From 8292ef10ca93d444f0b89970b852c861ee2c0882 Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Thu, 21 Sep 2023 13:53:10 -0500 Subject: [PATCH 1/9] Update image names (#1835) PR updates `rapidsai/ci` references to `rapidsai/ci-conda` Authors: - Jake Awe (https://github.com/AyodeAwe) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/raft/pull/1835 --- .github/workflows/build.yaml | 2 +- .github/workflows/pr.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 00004c4e4d..107823d5ee 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -62,7 +62,7 @@ jobs: arch: "amd64" branch: ${{ inputs.branch }} build_type: ${{ inputs.build_type || 'branch' }} - container_image: "rapidsai/ci:latest" + container_image: "rapidsai/ci-conda:latest" date: ${{ inputs.date }} node_type: "gpu-v100-latest-1" run_script: "ci/build_docs.sh" diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 4437e0dc85..4fa3c5df86 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -62,7 +62,7 @@ jobs: build_type: pull-request node_type: "gpu-v100-latest-1" arch: "amd64" - container_image: "rapidsai/ci:latest" + container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_docs.sh" wheel-build-pylibraft: needs: checks From 4f0a2d2d6e30eea0c036ca3b531e03e44e760fbe Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Thu, 21 Sep 2023 15:39:33 -0700 Subject: [PATCH 2/9] Add NVTX ranges for cagra search/serialize functions (#1737) * Add NVTX ranges for cagra search/serialize functions * Add warn_non_pool_workspace for cagra build/search Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1737 --- cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh | 2 ++ cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh | 8 +++++++- .../raft/neighbors/detail/cagra/cagra_serialize.cuh | 5 +++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh index d19d7e7904..80e964df57 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -46,6 +47,7 @@ void build_knn_graph(raft::resources const& res, std::optional build_params = std::nullopt, std::optional search_params = std::nullopt) { + resource::detail::warn_non_pool_workspace(res, "raft::neighbors::cagra::build"); RAFT_EXPECTS(!build_params || build_params->metric == distance::DistanceType::L2Expanded, "Currently only L2Expanded metric is supported"); diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index 8190817b5b..b484fa55f9 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -22,6 +22,8 @@ #include #include +#include +#include #include #include #include @@ -60,17 +62,21 @@ void search_main(raft::resources const& res, raft::device_matrix_view neighbors, raft::device_matrix_view distances) { + resource::detail::warn_non_pool_workspace(res, "raft::neighbors::cagra::search"); RAFT_LOG_DEBUG("# dataset size = %lu, dim = %lu\n", static_cast(index.dataset().extent(0)), static_cast(index.dataset().extent(1))); RAFT_LOG_DEBUG("# query size = %lu, dim = %lu\n", static_cast(queries.extent(0)), static_cast(queries.extent(1))); - RAFT_EXPECTS(queries.extent(1) == index.dim(), "Querise and index dim must match"); + RAFT_EXPECTS(queries.extent(1) == index.dim(), "Queries and index dim must match"); const uint32_t topk = neighbors.extent(1); if (params.max_queries == 0) { params.max_queries = queries.extent(0); } + common::nvtx::range fun_scope( + "cagra::search(max_queries = %u, k = %u, dim = %zu)", params.max_queries, topk, index.dim()); + std::unique_ptr> plan = factory::create( res, params, index.dim(), index.graph_degree(), topk); diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh index 2c9cbd2563..234911e15c 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include @@ -54,6 +55,8 @@ void serialize(raft::resources const& res, const index& index_, bool include_dataset) { + common::nvtx::range fun_scope("cagra::serialize"); + RAFT_LOG_DEBUG( "Saving CAGRA index, size %zu, dim %u", static_cast(index_.size()), index_.dim()); @@ -113,6 +116,8 @@ void serialize(raft::resources const& res, template auto deserialize(raft::resources const& res, std::istream& is) -> index { + common::nvtx::range fun_scope("cagra::deserialize"); + char dtype_string[4]; is.read(dtype_string, 4); From e1db49dcce59fa141470e4d5a374c9bcb067d7cc Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Fri, 22 Sep 2023 12:15:19 +0200 Subject: [PATCH 3/9] Add ivf-flat notebook (#1758) Example jupyter notebook to demonstrate usage of the IVF-Flat API Authors: - Tamas Bela Feher (https://github.com/tfeher) Approvers: - Artem M. Chirkin (https://github.com/achirkin) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1758 --- notebooks/ivf_flat_example.ipynb | 674 +++++++++++++++++++++++++++++++ notebooks/tutorial_ivf_pq.ipynb | 42 +- notebooks/utils.py | 103 +++++ 3 files changed, 788 insertions(+), 31 deletions(-) create mode 100644 notebooks/ivf_flat_example.ipynb create mode 100644 notebooks/utils.py diff --git a/notebooks/ivf_flat_example.ipynb b/notebooks/ivf_flat_example.ipynb new file mode 100644 index 0000000000..08b9d78169 --- /dev/null +++ b/notebooks/ivf_flat_example.ipynb @@ -0,0 +1,674 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4f49c5c4-1170-42a7-9d6a-b90acd00c3c3", + "metadata": {}, + "source": [ + "# RAFT IVF Flat Example Notebook" + ] + }, + { + "cell_type": "markdown", + "id": "4bcfe810-f120-422c-b2bb-72cc43d0c4ca", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "This notebook demonstrates how to run approximate nearest neighbor search using RAFT IVF-Flat algorithm.\n", + "It builds and searches an index using a dataset from the ann-benchmarks million-scale datasets, saves/loads the index to disk, and explores important parameters for fine-tuning the search performance and accuracy of the index." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "fe73ada7-7b7f-4005-9440-85428194311b", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import cupy as cp\n", + "import numpy as np\n", + "from pylibraft.common import DeviceResources\n", + "from pylibraft.neighbors import ivf_flat\n", + "import matplotlib.pyplot as plt\n", + "import tempfile\n", + "from utils import BenchmarkTimer, calc_recall, load_dataset" + ] + }, + { + "cell_type": "markdown", + "id": "da9e8615-ea9f-4735-b70f-15ccab36c0d9", + "metadata": {}, + "source": [ + "For best performance it is recommended to use an RMM pooling allocator, to minimize the overheads of repeated CUDA allocations." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5350e4d9-0993-406a-80af-29538b5677c2", + "metadata": {}, + "outputs": [], + "source": [ + "import rmm\n", + "from rmm.allocators.cupy import rmm_cupy_allocator\n", + "mr = rmm.mr.PoolMemoryResource(\n", + " rmm.mr.CudaMemoryResource(),\n", + " initial_pool_size=2**30\n", + ")\n", + "rmm.mr.set_current_device_resource(mr)\n", + "cp.cuda.set_allocator(rmm_cupy_allocator)" + ] + }, + { + "cell_type": "markdown", + "id": "b0d935f2-ba24-44fc-bdfe-a769b7fcd8e6", + "metadata": {}, + "source": [ + "The following GPU is used for this notebook" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a5daa4b4-96de-4e74-bfd6-505b13595f62", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Thu Sep 21 02:30:53 2023 \n", + "+---------------------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n", + "|-----------------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", + "| | | MIG M. |\n", + "|=========================================+======================+======================|\n", + "| 0 NVIDIA H100 PCIe On | 00000000:41:00.0 Off | 0 |\n", + "| N/A 35C P0 69W / 350W | 1487MiB / 81559MiB | 0% Default |\n", + "| | | Disabled |\n", + "+-----------------------------------------+----------------------+----------------------+\n", + " \n", + "+---------------------------------------------------------------------------------------+\n", + "| Processes: |\n", + "| GPU GI CI PID Type Process name GPU Memory |\n", + "| ID ID Usage |\n", + "|=======================================================================================|\n", + "| 0 N/A N/A 3940 C /opt/conda/envs/rapids/bin/python 1474MiB |\n", + "+---------------------------------------------------------------------------------------+\n" + ] + } + ], + "source": [ + "# Report the GPU in use\n", + "!nvidia-smi" + ] + }, + { + "cell_type": "markdown", + "id": "88a654cc-6389-4526-a3e6-826de5606a09", + "metadata": {}, + "source": [ + "## Load dataset\n", + "\n", + "The ANN benchmarks website provides the datasets in HDF5 format.\n", + "\n", + "The list of prepared datasets can be found at https://github.com/erikbern/ann-benchmarks/#data-sets" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "5f529ad6-b0bd-495c-bf7c-43f10fb6aa14", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The index and data will be saved in /tmp/raft_example\n" + ] + } + ], + "source": [ + "WORK_FOLDER = os.path.join(tempfile.gettempdir(), \"raft_example\")\n", + "f = load_dataset(\"http://ann-benchmarks.com/sift-128-euclidean.hdf5\", work_folder=WORK_FOLDER)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3d68a7db-bcf4-449c-96c3-1e8ab146c84d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded dataset of size (1000000, 128), 0.5 GiB; metric: 'euclidean'.\n", + "Number of test queries: 10000\n" + ] + } + ], + "source": [ + "metric = f.attrs['distance']\n", + "\n", + "dataset = cp.array(f['train'])\n", + "queries = cp.array(f['test'])\n", + "gt_neighbors = cp.array(f['neighbors'])\n", + "gt_distances = cp.array(f['distances'])\n", + "\n", + "itemsize = dataset.dtype.itemsize \n", + "\n", + "print(f\"Loaded dataset of size {dataset.shape}, {dataset.size*itemsize/(1<<30):4.1f} GiB; metric: '{metric}'.\")\n", + "print(f\"Number of test queries: {queries.shape[0]}\")" + ] + }, + { + "cell_type": "markdown", + "id": "9f463c50-d1d3-49be-bcfe-952602efa603", + "metadata": {}, + "source": [ + "## Build index\n", + "We set [IndexParams](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#pylibraft.neighbors.ivf_flat.IndexParams) and build the index. The index parameters will be discussed in more detail in later sections of this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "737f8841-93f9-4c8e-b2e1-787d4474ef94", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 120 ms, sys: 5.33 ms, total: 125 ms\n", + "Wall time: 124 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "build_params = ivf_flat.IndexParams(\n", + " n_lists=1024,\n", + " metric=\"euclidean\",\n", + " kmeans_trainset_fraction=0.1,\n", + " kmeans_n_iters=20,\n", + " add_data_on_build=True\n", + " )\n", + "\n", + "index = ivf_flat.build(build_params, dataset)" + ] + }, + { + "cell_type": "markdown", + "id": "a16a0cf6-3b05-4afd-9bb8-54431e0d7439", + "metadata": {}, + "source": [ + "The index is built. We can print some basic information of the index" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1aec7024-6e5d-4d2c-82e6-7b5734aec958", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(type=IVF-FLAT, metric=euclidean, size=1000000, dim=128, n_lists=1024, adaptive_centers=False)\n" + ] + } + ], + "source": [ + "print(index)" + ] + }, + { + "cell_type": "markdown", + "id": "df7d4958-56a3-48ea-bd64-3486fdb57fb7", + "metadata": {}, + "source": [ + "## Search neighbors" + ] + }, + { + "cell_type": "markdown", + "id": "89ba2eaa-4c85-4e1c-b07c-920394e55dce", + "metadata": {}, + "source": [ + "It is recommended to reuse [device recosources](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/common/#pylibraft.common.DeviceResources) across multiple invocations of search, since constructing these can be time consuming. We will reuse the resources by passing the same handle to each RAFT API call." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "46e0421b-9335-47a2-8451-a91f56c2f086", + "metadata": {}, + "outputs": [], + "source": [ + "handle = DeviceResources()" + ] + }, + { + "cell_type": "markdown", + "id": "a6365229-18fd-468f-af30-e24b950cbd6e", + "metadata": {}, + "source": [ + "After setting [SearchParams](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#pylibraft.neighbors.ivf_flat.SearchParams) we search for for `k=10` neighbors." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "595454e1-7240-4b43-9a73-963d5670b00c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 171 ms, sys: 52.6 ms, total: 224 ms\n", + "Wall time: 236 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "n_queries=10000\n", + "# n_probes is the number of clusters we select in the first (coarse) search step. This is the only hyper parameter for search.\n", + "search_params = ivf_flat.SearchParams(n_probes=30)\n", + "\n", + "# Search 10 nearest neighbors.\n", + "distances, indices = ivf_flat.search(search_params, index, cp.asarray(queries[:n_queries,:]), k=10, handle=handle)\n", + " \n", + "# RAFT calls are asynchronous (when handle arg is provided), we need to sync before accessing the results.\n", + "handle.sync()\n", + "distances, neighbors = cp.asnumpy(distances), cp.asnumpy(indices)" + ] + }, + { + "cell_type": "markdown", + "id": "43d20ca7-7b9e-4046-bb52-640a2744db75", + "metadata": {}, + "source": [ + "The returned arrays have shape {n_queries x 10] and store the distance values and the indices of the searched vectors. We check how accurate the search is. The accuracy of the search is quantified as `recall`, which is a value between 0 and 1 and tells us what fraction of the returned neighbors are actual k nearest neighbors. " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "8cd9cd20-ca00-4a35-a0a0-86636521b31a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.97406" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "calc_recall(neighbors, gt_neighbors)" + ] + }, + { + "cell_type": "markdown", + "id": "cde5079c-9777-45a1-9545-cffbcc59988f", + "metadata": {}, + "source": [ + "## Save and load the index\n", + "You can serialize the index to file using [save](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#pylibraft.neighbors.ivf_flat.save), and [load](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#pylibraft.neighbors.ivf_flat.load) it later." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "bf94e45c-e7fb-4aa3-a611-ddaee7ac41ae", + "metadata": {}, + "outputs": [], + "source": [ + "index_file = os.path.join(WORK_FOLDER, \"my_ivf_flat_index.bin\")\n", + "ivf_flat.save(index_file, index)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "1622d9be-be41-4d25-be99-d348c5e54957", + "metadata": {}, + "outputs": [], + "source": [ + "index = ivf_flat.load(index_file)" + ] + }, + { + "cell_type": "markdown", + "id": "15d503e5-05e8-47ce-8501-e13fc512099c", + "metadata": {}, + "source": [ + "## Tune search parameters\n", + "Search has a single hyper parameter: `n_probes`, which describes how many neighboring cluster is searched (probed) for each query. Within a probed cluster, the distance is computed between all the vectors in the cluster and the query point, and the top-k neighbors are selected. Finally, the top-k neighbors are selected from all the neighbor candidates from the probed clusters.\n", + "\n", + "Let's see how search accuracy and latency changes when we change the `n_probes` parameter." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "ace0c31f-af75-4352-a438-123a9a03612c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Benchmarking search with n_probes = 10\n", + "recall 0.86625\n", + "Average search time: 0.026 +/- 0.000259 s\n", + "Queries per second (QPS): 384968\n", + "\n", + "Benchmarking search with n_probes = 20\n", + "recall 0.94705\n", + "Average search time: 0.050 +/- 5.43e-05 s\n", + "Queries per second (QPS): 198880\n", + "\n", + "Benchmarking search with n_probes = 30\n", + "recall 0.97406\n", + "Average search time: 0.075 +/- 8.59e-05 s\n", + "Queries per second (QPS): 133954\n", + "\n", + "Benchmarking search with n_probes = 50\n", + "recall 0.99169\n", + "Average search time: 0.123 +/- 4.78e-05 s\n", + "Queries per second (QPS): 80997\n", + "\n", + "Benchmarking search with n_probes = 100\n", + "recall 0.99844\n", + "Average search time: 0.244 +/- 0.000249 s\n", + "Queries per second (QPS): 40934\n", + "\n", + "Benchmarking search with n_probes = 200\n", + "recall 0.99932\n", + "Average search time: 0.468 +/- 0.000367 s\n", + "Queries per second (QPS): 21382\n", + "\n", + "Benchmarking search with n_probes = 500\n", + "recall 0.99933\n", + "Average search time: 1.039 +/- 0.000209 s\n", + "Queries per second (QPS): 9625\n", + "\n", + "Benchmarking search with n_probes = 1024\n", + "recall 0.99935\n", + "Average search time: 0.701 +/- 0.00579 s\n", + "Queries per second (QPS): 14273\n" + ] + } + ], + "source": [ + "n_probes = np.asarray([10, 20, 30, 50, 100, 200, 500, 1024]);\n", + "qps = np.zeros(n_probes.shape);\n", + "recall = np.zeros(n_probes.shape);\n", + "\n", + "for i in range(len(n_probes)):\n", + " print(\"\\nBenchmarking search with n_probes =\", n_probes[i])\n", + " timer = BenchmarkTimer(reps=1, warmup=1)\n", + " for rep in timer.benchmark_runs():\n", + " distances, neighbors = ivf_flat.search(\n", + " ivf_flat.SearchParams(n_probes=n_probes[i]),\n", + " index,\n", + " cp.asarray(queries),\n", + " k=10,\n", + " handle=handle,\n", + " )\n", + " handle.sync()\n", + " \n", + " recall[i] = calc_recall(cp.asnumpy(neighbors), gt_neighbors)\n", + " print(\"recall\", recall[i])\n", + "\n", + " timings = np.asarray(timer.timings)\n", + " avg_time = timings.mean()\n", + " std_time = timings.std()\n", + " qps[i] = queries.shape[0] / avg_time\n", + " print(\"Average search time: {0:7.3f} +/- {1:7.3} s\".format(avg_time, std_time))\n", + " print(\"Queries per second (QPS): {0:8.0f}\".format(qps[i]))" + ] + }, + { + "cell_type": "markdown", + "id": "20b2498c-7231-4211-990e-600d5c26a9a1", + "metadata": {}, + "source": [ + "The plots below illustrate how the accuracy (recall) and the throughput (queries per second) depends on the `n_probes` parameter." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1ac370f-91c8-4054-95c7-a749df5f16d2", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure(figsize=(12,3))\n", + "ax = fig.add_subplot(131)\n", + "ax.plot(n_probes, recall,'o-')\n", + "#ax.set_xticks(bench_k, bench_k)\n", + "ax.set_xlabel('n_probes')\n", + "ax.grid()\n", + "ax.set_ylabel('recall (@k=10)')\n", + "\n", + "ax = fig.add_subplot(132)\n", + "ax.plot(n_probes, qps,'o-')\n", + "#ax.set_xticks(bench_k, bench_k)\n", + "ax.set_xlabel('n_probes')\n", + "ax.grid()\n", + "ax.set_ylabel('queries per second');\n", + "\n", + "ax = fig.add_subplot(133)\n", + "ax.plot(recall, qps,'o-')\n", + "#ax.set_xticks(bench_k, bench_k)\n", + "ax.set_xlabel('recall')\n", + "ax.grid()\n", + "ax.set_ylabel('queries per second');\n", + "#ax.set_yscale('log')" + ] + }, + { + "cell_type": "markdown", + "id": "81e7ad6a-bddc-45de-9cce-0fb913f91efe", + "metadata": {}, + "source": [ + "## Adjust build parameters\n", + "### n_lists\n", + "The number of clusters (or lists) is set by the n_list parameter. Let's change it to 100 clusters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "addbfff3-7773-4290-9608-5489edf4886d", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "build_params = ivf_flat.IndexParams(\n", + " n_lists=100,\n", + " metric=\"euclidean\",\n", + " kmeans_trainset_fraction=1,\n", + " kmeans_n_iters=20,\n", + " add_data_on_build=True\n", + " )\n", + "\n", + "index = ivf_flat.build(build_params, dataset, handle=handle)" + ] + }, + { + "cell_type": "markdown", + "id": "48db27f9-54c8-4dac-839b-af94ada8885f", + "metadata": {}, + "source": [ + "The ratio of n_probes / n_list will determine how large fraction of the dataset is searched for each query. The right combination depends on the use case. Here we will search 10 of the clusters for each query." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a0149ad-de38-4195-97a5-ce5d5d877036", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "n_queries=10000\n", + "\n", + "search_params = ivf_flat.SearchParams(n_probes=10)\n", + "\n", + "# Search 10 nearest neighbors.\n", + "distances, indices = ivf_flat.search(search_params, index, cp.asarray(queries[:n_queries,:]), k=10, handle=handle)\n", + " \n", + "handle.sync()\n", + "distances, neighbors = cp.asnumpy(distances), cp.asnumpy(indices)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eedc3ec4-06af-42c5-8cdf-490a5c2bc49a", + "metadata": {}, + "outputs": [], + "source": [ + "calc_recall(neighbors, gt_neighbors)" + ] + }, + { + "cell_type": "markdown", + "id": "0c44800f-1e9e-4f7b-87fe-0f25e6590faa", + "metadata": {}, + "source": [ + "### trainset_fraction\n", + "During clustering we can sub-sample the dataset. The parameter `trainset_fraction` determines what fraction to use. Often we get good results by using only 1/10th of the dataset for clustering. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a54d190-64d4-4cd4-a497-365cbffda871", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "build_params = ivf_flat.IndexParams( \n", + " n_lists=100, \n", + " metric=\"sqeuclidean\", \n", + " kmeans_trainset_fraction=0.1, \n", + " kmeans_n_iters=20 \n", + " ) \n", + "index = ivf_flat.build(build_params, dataset, handle=handle)" + ] + }, + { + "cell_type": "markdown", + "id": "9d86a213-d6ae-4fca-9082-cb5a4d1dab36", + "metadata": {}, + "source": [ + "We see only a minimal change in the recall" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4cc992e8-a5e5-4508-b790-0e934160b660", + "metadata": {}, + "outputs": [], + "source": [ + "search_params = ivf_flat.SearchParams(n_probes=10)\n", + "\n", + "distances, indices = ivf_flat.search(search_params, index, cp.asarray(queries[:n_queries,:]), k=10, handle=handle)\n", + " \n", + "handle.sync()\n", + "distances, neighbors = cp.asnumpy(distances), cp.asnumpy(indices)\n", + "calc_recall(neighbors, gt_neighbors)" + ] + }, + { + "cell_type": "markdown", + "id": "25289ebc-7d89-4fa6-bc62-e25b6e77750c", + "metadata": {}, + "source": [ + "### Add vectors on build\n", + "Currently you cannot configure how RAFT sub-samples the input. If you want to have a fine control on how the training set is selected, then create the index in two steps:\n", + "1. Define cluster centers on a training set, but do not add any vector to the index\n", + "2. Add vectors to the index (extend)\n", + "\n", + "This workflow shall be familiar to FAISS users. Note that raft does not require adding the data in batches, internal batching is used when necessary.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ebcf970-94ed-4825-9885-277bd984b90c", + "metadata": {}, + "outputs": [], + "source": [ + "# subsample the dataset\n", + "n_train = 10000\n", + "train_set = dataset[cp.random.choice(dataset.shape[0], n_train, replace=False),:]\n", + "\n", + "# build using training set\n", + "build_params = ivf_flat.IndexParams(\n", + " n_lists=1024,\n", + " metric=\"sqeuclidean\",\n", + " kmeans_trainset_fraction=1,\n", + " kmeans_n_iters=20,\n", + " add_data_on_build=False\n", + " )\n", + "index = ivf_flat.build(build_params, train_set)\n", + "\n", + "print(\"Index before adding vectors\", index)\n", + "\n", + "ivf_flat.extend(index, dataset, cp.arange(dataset.shape[0], dtype=cp.int64))\n", + "\n", + "print(\"Index after adding vectors\", index)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "029d48a9-baf7-4263-af43-9e500ef3cce4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/tutorial_ivf_pq.ipynb b/notebooks/tutorial_ivf_pq.ipynb index 6aa8cd6495..397e39bfba 100644 --- a/notebooks/tutorial_ivf_pq.ipynb +++ b/notebooks/tutorial_ivf_pq.ipynb @@ -79,6 +79,7 @@ "from pylibraft.common import DeviceResources\n", "from pylibraft.neighbors import ivf_pq, refine\n", "from adjustText import adjust_text\n", + "from utils import calc_recall, load_dataset\n", "\n", "%matplotlib inline" ] @@ -194,15 +195,18 @@ "cell_type": "code", "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The index and data will be saved in /tmp/raft_example\n" + ] + } + ], "source": [ "DATASET_URL = \"http://ann-benchmarks.com/sift-128-euclidean.hdf5\"\n", - "DATASET_FILENAME = DATASET_URL.split('/')[-1]\n", - "\n", - "## download the dataset\n", - "dataset_path = os.path.join(WORK_FOLDER, DATASET_FILENAME)\n", - "if not os.path.exists(dataset_path):\n", - " urllib.request.urlretrieve(DATASET_URL, dataset_path)" + "f = load_dataset(DATASET_URL)" ] }, { @@ -227,8 +231,6 @@ } ], "source": [ - "f = h5py.File(dataset_path, \"r\")\n", - "\n", "metric = f.attrs['distance']\n", "\n", "dataset = cp.array(f['train'])\n", @@ -456,28 +458,6 @@ } ], "source": [ - "## Check the quality of the prediction (recall)\n", - "def calc_recall(found_indices, ground_truth):\n", - " found_indices = cp.asarray(found_indices)\n", - " bs, k = found_indices.shape\n", - " if bs != ground_truth.shape[0]:\n", - " raise RuntimeError(\n", - " \"Batch sizes do not match {} vs {}\".format(\n", - " bs, ground_truth.shape[0])\n", - " )\n", - " if k > ground_truth.shape[1]:\n", - " raise RuntimeError(\n", - " \"Not enough indices in the ground truth ({} > {})\".format(\n", - " k, ground_truth.shape[1])\n", - " )\n", - " n = 0\n", - " # Go over the batch\n", - " for i in range(bs):\n", - " # Note, ivf-pq does not guarantee the ordered input, hence the use of intersect1d\n", - " n += cp.intersect1d(found_indices[i, :k], ground_truth[i, :k]).size\n", - " recall = n / found_indices.size\n", - " return recall\n", - "\n", "recall_first_try = calc_recall(neighbors, gt_neighbors)\n", "print(f\"Got recall = {recall_first_try} with the default parameters (k = {k}).\")" ] diff --git a/notebooks/utils.py b/notebooks/utils.py new file mode 100644 index 0000000000..1c2e44a6ae --- /dev/null +++ b/notebooks/utils.py @@ -0,0 +1,103 @@ +# +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import cupy as cp +import h5py +import os +import tempfile +import time +import urllib + +## Check the quality of the prediction (recall) +def calc_recall(found_indices, ground_truth): + found_indices = cp.asarray(found_indices) + bs, k = found_indices.shape + if bs != ground_truth.shape[0]: + raise RuntimeError( + "Batch sizes do not match {} vs {}".format( + bs, ground_truth.shape[0] + ) + ) + if k > ground_truth.shape[1]: + raise RuntimeError( + "Not enough indices in the ground truth ({} > {})".format( + k, ground_truth.shape[1] + ) + ) + n = 0 + # Go over the batch + for i in range(bs): + # Note, ivf-pq does not guarantee the ordered input, hence the use of intersect1d + n += cp.intersect1d(found_indices[i, :k], ground_truth[i, :k]).size + recall = n / found_indices.size + return recall + + +class BenchmarkTimer: + """Provides a context manager that runs a code block `reps` times + and records results to the instance variable `timings`. Use like: + .. code-block:: python + timer = BenchmarkTimer(rep=5) + for _ in timer.benchmark_runs(): + ... do something ... + print(np.min(timer.timings)) + + This class is borrowed from the rapids/cuml benchmark suite + """ + + def __init__(self, reps=1, warmup=0): + self.warmup = warmup + self.reps = reps + self.timings = [] + + def benchmark_runs(self): + for r in range(self.reps + self.warmup): + t0 = time.time() + yield r + t1 = time.time() + self.timings.append(t1 - t0) + if r >= self.warmup: + self.timings.append(t1 - t0) + + +def load_dataset(dataset_url, work_folder=None): + """Download dataset from url. It is expected that the dataset contains a hdf5 file in ann-benchmarks format + + Parameters + ---------- + dataset_url address of hdf5 file + work_folder name of the local folder to store the dataset + + """ + dataset_url = "http://ann-benchmarks.com/sift-128-euclidean.hdf5" + dataset_filename = dataset_url.split("/")[-1] + + # We'll need to load store some data in this tutorial + if work_folder is None: + work_folder = os.path.join(tempfile.gettempdir(), "raft_example") + + if not os.path.exists(work_folder): + os.makedirs(work_folder) + print("The index and data will be saved in", work_folder) + + ## download the dataset + dataset_path = os.path.join(work_folder, dataset_filename) + if not os.path.exists(dataset_path): + urllib.request.urlretrieve(dataset_url, dataset_path) + + f = h5py.File(dataset_path, "r") + + return f From 9785617acde3608ba67451c79b37369b59bf7927 Mon Sep 17 00:00:00 2001 From: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Fri, 22 Sep 2023 10:55:25 -0400 Subject: [PATCH 4/9] Fix update-version.sh for all pyproject.toml files [skip ci] (#1839) `python/raft-ann-bench/pyproject.toml` was missed in `update-version.sh`. This PR refactors a bit to update all `pyproject.toml` files which will capture future ones as well. Authors: - Ray Douglass (https://github.com/raydouglass) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Jake Awe (https://github.com/AyodeAwe) --- ci/release/update-version.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 6a7e319f5d..7a69b95da1 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -47,10 +47,6 @@ sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cma sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/pylibraft/pylibraft/__init__.py sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/raft-dask/raft_dask/__init__.py -# Python pyproject.toml updates -sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/pylibraft/pyproject.toml -sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/raft-dask/pyproject.toml - # Wheel testing script sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_raft_dask.sh @@ -74,6 +70,7 @@ for FILE in python/*/pyproject.toml; do for DEP in "${DEPENDENCIES[@]}"; do sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*\"/g" ${FILE} done + sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" "${FILE}" sed_runner "/\"ucx-py==/ s/==.*\"/==${NEXT_UCX_PY_SHORT_TAG_PEP440}.*\"/g" ${FILE} done From d35a0a9e56fa12330dd8becde34b1e1c46c2b600 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Fri, 22 Sep 2023 12:45:35 -0500 Subject: [PATCH 5/9] Make RMM a run dependency of the raft-ann-bench conda package (#1838) RMM was listed as a `host` dependency but not `run`, made it so that installing `raft-ann-bench` does not automatically install `RMM` for end users. Authors: - Dante Gama Dessavre (https://github.com/dantegd) Approvers: - Ray Douglass (https://github.com/raydouglass) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1838 --- conda/recipes/raft-ann-bench/meta.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/conda/recipes/raft-ann-bench/meta.yaml b/conda/recipes/raft-ann-bench/meta.yaml index 91d0fdb729..a2ab0af643 100644 --- a/conda/recipes/raft-ann-bench/meta.yaml +++ b/conda/recipes/raft-ann-bench/meta.yaml @@ -78,11 +78,11 @@ requirements: - h5py {{ h5py_version }} - benchmark - matplotlib - # rmm is needed to determine if package is gpu-enabled - - rmm ={{ minor_version }} - python - pandas - pyyaml + # rmm is needed to determine if package is gpu-enabled + - rmm ={{ minor_version }} run: - python @@ -104,6 +104,8 @@ requirements: - python - pandas - pyyaml + # rmm is needed to determine if package is gpu-enabled + - rmm ={{ minor_version }} about: home: https://rapids.ai/ license: Apache-2.0 From ba923cc09175c793ada63aa930d8d7d486a98c69 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Sun, 24 Sep 2023 22:29:08 -0500 Subject: [PATCH 6/9] raft-ann-bench package fixes for plotting and conf files (#1844) Number of improvements and fixes for different million scale datasets that are supported by default by the python benchmarking package. Authors: - Dante Gama Dessavre (https://github.com/dantegd) Approvers: - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/raft/pull/1844 --- .../src/raft-ann-bench/plot/__main__.py | 41 +- .../run/conf/deep-image-96-inner.json | 1 + .../run/conf/fashion-mnist-784-euclidean.json | 1 + .../run/conf/glove-100-inner.json | 1766 +++++++++++------ .../run/conf/glove-50-inner.json | 1351 +++++++++++++ .../run/conf/mnist-784-euclidean.json | 1 + .../run/conf/nytimes-256-angular.json | 1 + .../run/conf/nytimes-256-inner.json | 1352 +++++++++++++ 8 files changed, 3923 insertions(+), 591 deletions(-) create mode 100644 python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-inner.json create mode 100644 python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-inner.json diff --git a/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py index 198d0a2b14..233607c281 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py @@ -254,18 +254,18 @@ def create_plot_build( xn = "k-nn" yn = "qps" - # recall_85 = [-1] * len(linestyles) qps_85 = [-1] * len(linestyles) bt_85 = [0] * len(linestyles) i_85 = [-1] * len(linestyles) - # recall_90 = [-1] * len(linestyles) + qps_90 = [-1] * len(linestyles) bt_90 = [0] * len(linestyles) i_90 = [-1] * len(linestyles) - # recall_95 = [-1] * len(linestyles) + qps_95 = [-1] * len(linestyles) bt_95 = [0] * len(linestyles) i_95 = [-1] * len(linestyles) + data = OrderedDict() colors = OrderedDict() @@ -303,7 +303,7 @@ def mean_y(algo): plt.figure(figsize=(12, 9)) ax = df.plot.bar(rot=0, color=colors) fig = ax.get_figure() - print(f"writing search output to {fn_out}") + print(f"writing build output to {fn_out}") plt.title("Build Time for Highest QPS") plt.suptitle(f"{dataset} k={k} batch_size={batch_size}") plt.ylabel("Build Time (s)") @@ -313,35 +313,22 @@ def mean_y(algo): def load_lines(results_path, result_files, method, index_key): results = dict() - linebreaker = "name,iterations" - for result_filename in result_files: if result_filename.endswith(".csv"): with open(os.path.join(results_path, result_filename), "r") as f: lines = f.readlines() lines = lines[:-1] if lines[-1] == "\n" else lines - idx = 0 - for pos, line in enumerate(lines): - if linebreaker in line: - idx = pos - break if method == "build": - if "hnswlib" in result_filename: - key_idx = [2] - else: - key_idx = [10] + key_idx = [2] elif method == "search": - if "hnswlib" in result_filename: - key_idx = [10, 6] - else: - key_idx = [12, 10] + key_idx = [2, 3] - for line in lines[idx + 1 :]: + for line in lines[1:]: split_lines = line.split(",") - algo_name = split_lines[0].split(".")[0].strip('"') - index_name = split_lines[0].split("/")[0].strip('"') + algo_name = split_lines[0] + index_name = split_lines[1] if index_key == "algo": dict_key = algo_name @@ -394,9 +381,7 @@ def main(): ) parser.add_argument( "--dataset-path", - help="path to dataset folder, by default will look in " - "RAPIDS_DATASET_ROOT_DIR if defined, otherwise a datasets " - "subdirectory from the calling directory", + help="path to dataset folder", default=default_dataset_path, ) parser.add_argument( @@ -460,10 +445,12 @@ def main(): search = args.search search_output_filepath = os.path.join( - args.output_filepath, f"search-{args.dataset}-{k}-{batch_size}.png" + args.output_filepath, + f"search-{args.dataset}-k{k}-batch_size{batch_size}.png", ) build_output_filepath = os.path.join( - args.output_filepath, f"build-{args.dataset}-{k}-{batch_size}.png" + args.output_filepath, + f"build-{args.dataset}-k{k}-batch_size{batch_size}.png", ) search_results = load_all_results( diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json index f1c033e415..ab82405439 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json @@ -3,6 +3,7 @@ "name": "deep-image-96-inner", "base_file": "deep-image-96-inner/base.fbin", "query_file": "deep-image-96-inner/query.fbin", + "groundtruth_neighbors_file": "deep-image-96-inner/groundtruth.neighbors.ibin", "distance": "euclidean" }, "search_basic_param": { diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json index 65f28fc81a..0efe1fc498 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json @@ -3,6 +3,7 @@ "name": "fashion-mnist-784-euclidean", "base_file": "fashion-mnist-784-euclidean/base.fbin", "query_file": "fashion-mnist-784-euclidean/query.fbin", + "groundtruth_neighbors_file": "fashion-mnist-784-euclidean/groundtruth.neighbors.ibin", "distance": "euclidean" }, "search_basic_param": { diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json index 7c95ceb439..54c8bf908c 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json @@ -3,711 +3,1349 @@ "name": "glove-100-inner", "base_file": "glove-100-inner/base.fbin", "query_file": "glove-100-inner/query.fbin", - "groundtruth_neighbors_file": "glove-100-inner/groundtruth.neighbors.ibin", - "distance": "inner_product" + "distance": "euclidean" }, - "search_basic_param": { - "batch_size": 1, - "k": 10 + "batch_size": 5000, + "k": 10, + "run_count": 3 }, - "index": [ { - "name": "hnswlib.M4", - "algo": "hnswlib", - "build_param": {"M":4, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M4", - "search_params": [ - {"ef":10, "numThreads":1}, - {"ef":20, "numThreads":1}, - {"ef":40, "numThreads":1}, - {"ef":80, "numThreads":1}, - {"ef":120, "numThreads":1}, - {"ef":200, "numThreads":1}, - {"ef":400, "numThreads":1}, - {"ef":600, "numThreads":1}, - {"ef":800, "numThreads":1} - ] - }, - { - "name": "hnswlib.M8", - "algo": "hnswlib", - "build_param": {"M":8, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M8", - "search_params": [ + "name" : "hnswlib.M12", + "algo" : "hnswlib", + "build_param": {"M":12, "efConstruction":500, "numThreads":32}, + "file" : "index/glove-100-inner/hnswlib/M12", + "search_params" : [ {"ef":10, "numThreads":1}, {"ef":20, "numThreads":1}, {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, {"ef":80, "numThreads":1}, {"ef":120, "numThreads":1}, {"ef":200, "numThreads":1}, {"ef":400, "numThreads":1}, {"ef":600, "numThreads":1}, {"ef":800, "numThreads":1} - ] + ], + "search_result_file" : "result/glove-100-inner/hnswlib/M12" }, { - "name": "hnswlib.M12", - "algo": "hnswlib", - "build_param": {"M":12, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M12", - "search_params": [ + "name" : "hnswlib.M16", + "algo" : "hnswlib", + "build_param": {"M":16, "efConstruction":500, "numThreads":32}, + "file" : "index/glove-100-inner/hnswlib/M16", + "search_params" : [ {"ef":10, "numThreads":1}, {"ef":20, "numThreads":1}, {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, {"ef":80, "numThreads":1}, {"ef":120, "numThreads":1}, {"ef":200, "numThreads":1}, {"ef":400, "numThreads":1}, {"ef":600, "numThreads":1}, {"ef":800, "numThreads":1} - ] + ], + "search_result_file" : "result/glove-100-inner/hnswlib/M16" }, { - "name": "hnswlib.M16", - "algo": "hnswlib", - "build_param": {"M":16, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M16", - "search_params": [ + "name" : "hnswlib.M24", + "algo" : "hnswlib", + "build_param": {"M":24, "efConstruction":500, "numThreads":32}, + "file" : "index/glove-100-inner/hnswlib/M24", + "search_params" : [ {"ef":10, "numThreads":1}, {"ef":20, "numThreads":1}, {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, {"ef":80, "numThreads":1}, {"ef":120, "numThreads":1}, {"ef":200, "numThreads":1}, {"ef":400, "numThreads":1}, {"ef":600, "numThreads":1}, {"ef":800, "numThreads":1} - ] + ], + "search_result_file" : "result/glove-100-inner/hnswlib/M24" }, { - "name": "hnswlib.M24", - "algo": "hnswlib", - "build_param": {"M":24, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M24", - "search_params": [ + "name" : "hnswlib.M36", + "algo" : "hnswlib", + "build_param": {"M":36, "efConstruction":500, "numThreads":32}, + "file" : "index/glove-100-inner/hnswlib/M36", + "search_params" : [ {"ef":10, "numThreads":1}, {"ef":20, "numThreads":1}, {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, {"ef":80, "numThreads":1}, {"ef":120, "numThreads":1}, {"ef":200, "numThreads":1}, {"ef":400, "numThreads":1}, {"ef":600, "numThreads":1}, {"ef":800, "numThreads":1} - ] - }, - { - "name": "hnswlib.M36", - "algo": "hnswlib", - "build_param": {"M":36, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M36", - "search_params": [ - {"ef":10, "numThreads":1}, - {"ef":20, "numThreads":1}, - {"ef":40, "numThreads":1}, - {"ef":80, "numThreads":1}, - {"ef":120, "numThreads":1}, - {"ef":200, "numThreads":1}, - {"ef":400, "numThreads":1}, - {"ef":600, "numThreads":1}, - {"ef":800, "numThreads":1} - ] - }, - { - "name": "hnswlib.M48", - "algo": "hnswlib", - "build_param": {"M":48, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M48", - "search_params": [ - {"ef":10, "numThreads":1}, - {"ef":20, "numThreads":1}, - {"ef":40, "numThreads":1}, - {"ef":80, "numThreads":1}, - {"ef":120, "numThreads":1}, - {"ef":200, "numThreads":1}, - {"ef":400, "numThreads":1}, - {"ef":600, "numThreads":1}, - {"ef":800, "numThreads":1} - ] - }, - { - "name": "hnswlib.M64", - "algo": "hnswlib", - "build_param": {"M":64, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M64", - "search_params": [ - {"ef":10, "numThreads":1}, - {"ef":20, "numThreads":1}, - {"ef":40, "numThreads":1}, - {"ef":80, "numThreads":1}, - {"ef":120, "numThreads":1}, - {"ef":200, "numThreads":1}, - {"ef":400, "numThreads":1}, - {"ef":600, "numThreads":1}, - {"ef":800, "numThreads":1} - ] + ], + "search_result_file" : "result/glove-100-inner/hnswlib/M36" }, + + + + { - "name": "hnswlib.M96", - "algo": "hnswlib", - "build_param": {"M":96, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M96", + "name": "raft_bfknn", + "algo": "raft_bfknn", + "build_param": {}, + "file": "index/glove-100-inner/raft_bfknn/bfknn", "search_params": [ - {"ef":10, "numThreads":1}, - {"ef":20, "numThreads":1}, - {"ef":40, "numThreads":1}, - {"ef":80, "numThreads":1}, - {"ef":120, "numThreads":1}, - {"ef":200, "numThreads":1}, - {"ef":400, "numThreads":1}, - {"ef":600, "numThreads":1}, - {"ef":800, "numThreads":1} - ] + { + "probe": 1 + } + ], + "search_result_file": "result/glove-100-inner/raft_bfknn/bfknn" }, { "name": "faiss_ivf_flat.nlist1024", "algo": "faiss_gpu_ivf_flat", - "build_param": {"nlist":1024}, - "file": "glove-100-inner/faiss_ivf_flat/nlist1024", + "build_param": { + "nlist": 1024 + }, + "file": "index/glove-100-inner/faiss_ivf_flat/nlist1024", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_flat/nlist1024" }, { "name": "faiss_ivf_flat.nlist2048", "algo": "faiss_gpu_ivf_flat", - "build_param": {"nlist":2048}, - "file": "glove-100-inner/faiss_ivf_flat/nlist2048", + "build_param": { + "nlist": 2048 + }, + "file": "index/glove-100-inner/faiss_ivf_flat/nlist2048", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_flat/nlist2048" }, { "name": "faiss_ivf_flat.nlist4096", "algo": "faiss_gpu_ivf_flat", - "build_param": {"nlist":4096}, - "file": "glove-100-inner/faiss_ivf_flat/nlist4096", + "build_param": { + "nlist": 4096 + }, + "file": "index/glove-100-inner/faiss_ivf_flat/nlist4096", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_flat/nlist4096" }, { "name": "faiss_ivf_flat.nlist8192", "algo": "faiss_gpu_ivf_flat", - "build_param": {"nlist":8192}, - "file": "glove-100-inner/faiss_ivf_flat/nlist8192", + "build_param": { + "nlist": 8192 + }, + "file": "index/glove-100-inner/faiss_ivf_flat/nlist8192", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_flat/nlist8192" }, { "name": "faiss_ivf_flat.nlist16384", "algo": "faiss_gpu_ivf_flat", - "build_param": {"nlist":16384}, - "file": "glove-100-inner/faiss_ivf_flat/nlist16384", + "build_param": { + "nlist": 16384 + }, + "file": "index/glove-100-inner/faiss_ivf_flat/nlist16384", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000}, - {"nprobe":2000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_flat/nlist16384" }, - - - { - "name": "faiss_ivf_pq.M2-nlist1024", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":1024, "M":2}, - "file": "glove-100-inner/faiss_ivf_pq/M2-nlist1024", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M2-nlist2048", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":2048, "M":2}, - "file": "glove-100-inner/faiss_ivf_pq/M2-nlist2048", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M2-nlist4096", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":4096, "M":2}, - "file": "glove-100-inner/faiss_ivf_pq/M2-nlist4096", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M2-nlist8192", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":8192, "M":2}, - "file": "glove-100-inner/faiss_ivf_pq/M2-nlist8192", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M2-nlist16384", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":16384, "M":2}, - "file": "glove-100-inner/faiss_ivf_pq/M2-nlist16384", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000}, - {"nprobe":2000} - ] - }, - { - "name": "faiss_ivf_pq.M4-nlist1024", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":1024, "M":4}, - "file": "glove-100-inner/faiss_ivf_pq/M4-nlist1024", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M4-nlist2048", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":2048, "M":4}, - "file": "glove-100-inner/faiss_ivf_pq/M4-nlist2048", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M4-nlist4096", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":4096, "M":4}, - "file": "glove-100-inner/faiss_ivf_pq/M4-nlist4096", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M4-nlist8192", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":8192, "M":4}, - "file": "glove-100-inner/faiss_ivf_pq/M4-nlist8192", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M4-nlist16384", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":16384, "M":4}, - "file": "glove-100-inner/faiss_ivf_pq/M4-nlist16384", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000}, - {"nprobe":2000} - ] - }, - { - "name": "faiss_ivf_pq.M20-nlist1024", + "name": "faiss_ivf_pq.M64-nlist1024", "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":1024, "M":20}, - "file": "glove-100-inner/faiss_ivf_pq/M20-nlist1024", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M20-nlist2048", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":2048, "M":20}, - "file": "glove-100-inner/faiss_ivf_pq/M20-nlist2048", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M20-nlist4096", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":4096, "M":20}, - "file": "glove-100-inner/faiss_ivf_pq/M20-nlist4096", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M20-nlist8192", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":8192, "M":20}, - "file": "glove-100-inner/faiss_ivf_pq/M20-nlist8192", + "build_param": { + "nlist": 1024, + "M": 64, + "useFloat16": true, + "usePrecomputed": true + }, + "file": "index/glove-100-inner/faiss_ivf_pq/M64-nlist1024", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M20-nlist16384", + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_pq/M64-nlist1024" + }, + { + "name": "faiss_ivf_pq.M64-nlist1024.noprecomp", "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":16384, "M":20}, - "file": "glove-100-inner/faiss_ivf_pq/M20-nlist16384", + "build_param": { + "nlist": 1024, + "M": 64, + "useFloat16": true, + "usePrecomputed": false + }, + "file": "index/glove-100-inner/faiss_ivf_pq/M64-nlist1024.noprecomp", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000}, - {"nprobe":2000} - ] + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_pq/M64-nlist1024" }, - - { "name": "faiss_ivf_sq.nlist1024-fp16", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":1024, "quantizer_type":"fp16"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist1024-fp16", + "build_param": { + "nlist": 1024, + "quantizer_type": "fp16" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist1024-fp16", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist1024-fp16" }, { "name": "faiss_ivf_sq.nlist2048-fp16", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":2048, "quantizer_type":"fp16"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist2048-fp16", + "build_param": { + "nlist": 2048, + "quantizer_type": "fp16" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist2048-fp16", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist2048-fp16" }, { "name": "faiss_ivf_sq.nlist4096-fp16", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":4096, "quantizer_type":"fp16"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist4096-fp16", + "build_param": { + "nlist": 4096, + "quantizer_type": "fp16" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist4096-fp16", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist4096-fp16" }, { "name": "faiss_ivf_sq.nlist8192-fp16", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":8192, "quantizer_type":"fp16"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist8192-fp16", + "build_param": { + "nlist": 8192, + "quantizer_type": "fp16" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist8192-fp16", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist8192-fp16" }, { "name": "faiss_ivf_sq.nlist16384-fp16", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":16384, "quantizer_type":"fp16"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist16384-fp16", + "build_param": { + "nlist": 16384, + "quantizer_type": "fp16" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist16384-fp16", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000}, - {"nprobe":2000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist16384-fp16" }, { "name": "faiss_ivf_sq.nlist1024-int8", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":1024, "quantizer_type":"int8"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist1024-int8", + "build_param": { + "nlist": 1024, + "quantizer_type": "int8" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist1024-int8", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist1024-int8" }, { "name": "faiss_ivf_sq.nlist2048-int8", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":2048, "quantizer_type":"int8"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist2048-int8", + "build_param": { + "nlist": 2048, + "quantizer_type": "int8" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist2048-int8", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist2048-int8" }, { "name": "faiss_ivf_sq.nlist4096-int8", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":4096, "quantizer_type":"int8"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist4096-int8", + "build_param": { + "nlist": 4096, + "quantizer_type": "int8" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist4096-int8", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist4096-int8" }, { "name": "faiss_ivf_sq.nlist8192-int8", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":8192, "quantizer_type":"int8"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist8192-int8", + "build_param": { + "nlist": 8192, + "quantizer_type": "int8" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist8192-int8", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist8192-int8" }, { "name": "faiss_ivf_sq.nlist16384-int8", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":16384, "quantizer_type":"int8"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist16384-int8", + "build_param": { + "nlist": 16384, + "quantizer_type": "int8" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist16384-int8", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000}, - {"nprobe":2000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist16384-int8" }, { "name": "faiss_flat", "algo": "faiss_gpu_flat", "build_param": {}, - "file": "glove-100-inner/faiss_flat/flat", - "search_params": [{}] + "file": "index/glove-100-inner/faiss_flat/flat", + "search_params": [ + {} + ], + "search_result_file": "result/glove-100-inner/faiss_flat/flat" + }, + + { + "name": "raft_ivf_pq.dimpq128-cluster1024", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-float-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-float", + "search_params": [ + { + "k": 10, + "numProbes": 1, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 5, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-float" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-float-half", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-half", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-half" }, { - "name": "ggnn.kbuild96-segment64-refine2-k10", - "algo": "ggnn", + "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8", + "algo": "raft_ivf_pq", "build_param": { - "k_build": 96, - "segment_size": 64, - "refine_iterations": 2, - "dataset_size": 1183514, - "k": 10 + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 }, - "file": "glove-100-inner/ggnn/kbuild96-segment64-refine2-k10", + "file": "index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8", "search_params": [ - {"tau":0.001, "block_dim":64, "sorted_size":32}, - {"tau":0.005, "block_dim":64, "sorted_size":32}, - {"tau":0.01, "block_dim":64, "sorted_size":32}, - {"tau":0.02, "block_dim":64, "sorted_size":32}, - {"tau":0.03, "block_dim":64, "sorted_size":32}, - {"tau":0.04, "block_dim":64, "sorted_size":32}, - {"tau":0.05, "block_dim":64, "sorted_size":32}, - {"tau":0.06, "block_dim":64, "sorted_size":32}, - {"tau":0.09, "block_dim":64, "sorted_size":32}, - {"tau":0.12, "block_dim":64, "sorted_size":32}, - {"tau":0.18, "block_dim":64, "sorted_size":32}, - {"tau":0.21, "block_dim":64, "sorted_size":32}, - {"tau":0.24, "block_dim":64, "sorted_size":32}, - {"tau":0.27, "block_dim":64, "sorted_size":32}, - {"tau":0.3, "block_dim":64, "sorted_size":32}, - {"tau":0.4, "block_dim":64, "sorted_size":32}, - {"tau":0.01, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.02, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.03, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.04, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.05, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.06, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.09, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.12, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.18, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.21, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.24, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.27, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.3, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.4, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.5, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32} - ] + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 64, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq64-cluster1024-float-half", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 64, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq64-cluster1024-float-half", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq64-cluster1024-float-half" + }, + { + "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 32, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 16, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-half-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-half-float", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-half-float" + }, + { + "name": "raft_ivf_pq.dimpq512-cluster1024-float-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 512, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq512-cluster1024-float-float", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq512-cluster1024-float-float" + }, + { + "name": "raft_ivf_flat.nlist1024", + "algo": "raft_ivf_flat", + "build_param": { + "nlist": 1024, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_flat/nlist1024", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_flat/nlist1024" + }, + { + "name": "raft_ivf_flat.nlist16384", + "algo": "raft_ivf_flat", + "build_param": { + "nlist": 16384, + "ratio": 2, + "niter": 20 + }, + "file": "index/glove-100-inner/raft_ivf_flat/nlist16384", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_flat/nlist16384" + }, + + { + "name" : "raft_cagra.dim32", + "algo" : "raft_cagra", + "build_param": { + "graph_degree" : 32 + }, + "file" : "index/glove-100-inner/raft_cagra/dim32", + "search_params" : [ + {"itopk": 32}, + {"itopk": 64}, + {"itopk": 128} + ], + "search_result_file" : "result/glove-100-inner/raft_cagra/dim32" + }, + + { + "name" : "raft_cagra.dim64", + "algo" : "raft_cagra", + "build_param": { + "graph_degree" : 64 + }, + "file" : "index/glove-100-inner/raft_cagra/dim64", + "search_params" : [ + {"itopk": 32}, + {"itopk": 64}, + {"itopk": 128} + ], + "search_result_file" : "result/glove-100-inner/raft_cagra/dim64" } ] } diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-inner.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-inner.json new file mode 100644 index 0000000000..0fe7ac24df --- /dev/null +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-inner.json @@ -0,0 +1,1351 @@ +{ + "dataset": { + "name": "glove-50-inner", + "base_file": "glove-50-inner/base.fbin", + "query_file": "glove-50-inner/query.fbin", + "distance": "euclidean" + }, + "search_basic_param": { + "batch_size": 5000, + "k": 10, + "run_count": 3 + }, + "index": [ + { + "name" : "hnswlib.M12", + "algo" : "hnswlib", + "build_param": {"M":12, "efConstruction":500, "numThreads":32}, + "file" : "index/glove-50-inner/hnswlib/M12", + "search_params" : [ + {"ef":10, "numThreads":1}, + {"ef":20, "numThreads":1}, + {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, + {"ef":80, "numThreads":1}, + {"ef":120, "numThreads":1}, + {"ef":200, "numThreads":1}, + {"ef":400, "numThreads":1}, + {"ef":600, "numThreads":1}, + {"ef":800, "numThreads":1} + ], + "search_result_file" : "result/glove-50-inner/hnswlib/M12" + }, + { + "name" : "hnswlib.M16", + "algo" : "hnswlib", + "build_param": {"M":16, "efConstruction":500, "numThreads":32}, + "file" : "index/glove-50-inner/hnswlib/M16", + "search_params" : [ + {"ef":10, "numThreads":1}, + {"ef":20, "numThreads":1}, + {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, + {"ef":80, "numThreads":1}, + {"ef":120, "numThreads":1}, + {"ef":200, "numThreads":1}, + {"ef":400, "numThreads":1}, + {"ef":600, "numThreads":1}, + {"ef":800, "numThreads":1} + ], + "search_result_file" : "result/glove-50-inner/hnswlib/M16" + }, + { + "name" : "hnswlib.M24", + "algo" : "hnswlib", + "build_param": {"M":24, "efConstruction":500, "numThreads":32}, + "file" : "index/glove-50-inner/hnswlib/M24", + "search_params" : [ + {"ef":10, "numThreads":1}, + {"ef":20, "numThreads":1}, + {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, + {"ef":80, "numThreads":1}, + {"ef":120, "numThreads":1}, + {"ef":200, "numThreads":1}, + {"ef":400, "numThreads":1}, + {"ef":600, "numThreads":1}, + {"ef":800, "numThreads":1} + ], + "search_result_file" : "result/glove-50-inner/hnswlib/M24" + }, + { + "name" : "hnswlib.M36", + "algo" : "hnswlib", + "build_param": {"M":36, "efConstruction":500, "numThreads":32}, + "file" : "index/glove-50-inner/hnswlib/M36", + "search_params" : [ + {"ef":10, "numThreads":1}, + {"ef":20, "numThreads":1}, + {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, + {"ef":80, "numThreads":1}, + {"ef":120, "numThreads":1}, + {"ef":200, "numThreads":1}, + {"ef":400, "numThreads":1}, + {"ef":600, "numThreads":1}, + {"ef":800, "numThreads":1} + ], + "search_result_file" : "result/glove-50-inner/hnswlib/M36" + }, + + + + + { + "name": "raft_bfknn", + "algo": "raft_bfknn", + "build_param": {}, + "file": "index/glove-50-inner/raft_bfknn/bfknn", + "search_params": [ + { + "probe": 1 + } + ], + "search_result_file": "result/glove-50-inner/raft_bfknn/bfknn" + }, + { + "name": "faiss_ivf_flat.nlist1024", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 1024 + }, + "file": "index/glove-50-inner/faiss_ivf_flat/nlist1024", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_flat/nlist1024" + }, + { + "name": "faiss_ivf_flat.nlist2048", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 2048 + }, + "file": "index/glove-50-inner/faiss_ivf_flat/nlist2048", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_flat/nlist2048" + }, + { + "name": "faiss_ivf_flat.nlist4096", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 4096 + }, + "file": "index/glove-50-inner/faiss_ivf_flat/nlist4096", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_flat/nlist4096" + }, + { + "name": "faiss_ivf_flat.nlist8192", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 8192 + }, + "file": "index/glove-50-inner/faiss_ivf_flat/nlist8192", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_flat/nlist8192" + }, + { + "name": "faiss_ivf_flat.nlist16384", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 16384 + }, + "file": "index/glove-50-inner/faiss_ivf_flat/nlist16384", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_flat/nlist16384" + }, + { + "name": "faiss_ivf_pq.M64-nlist1024", + "algo": "faiss_gpu_ivf_pq", + "build_param": { + "nlist": 1024, + "M": 64, + "useFloat16": true, + "usePrecomputed": true + }, + "file": "index/glove-50-inner/faiss_ivf_pq/M64-nlist1024", + "search_params": [ + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_pq/M64-nlist1024" + }, + { + "name": "faiss_ivf_pq.M64-nlist1024.noprecomp", + "algo": "faiss_gpu_ivf_pq", + "build_param": { + "nlist": 1024, + "M": 64, + "useFloat16": true, + "usePrecomputed": false + }, + "file": "index/glove-50-inner/faiss_ivf_pq/M64-nlist1024.noprecomp", + "search_params": [ + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_pq/M64-nlist1024" + }, + { + "name": "faiss_ivf_sq.nlist1024-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 1024, + "quantizer_type": "fp16" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist1024-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist1024-fp16" + }, + { + "name": "faiss_ivf_sq.nlist2048-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 2048, + "quantizer_type": "fp16" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist2048-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist2048-fp16" + }, + { + "name": "faiss_ivf_sq.nlist4096-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 4096, + "quantizer_type": "fp16" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist4096-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist4096-fp16" + }, + { + "name": "faiss_ivf_sq.nlist8192-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 8192, + "quantizer_type": "fp16" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist8192-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist8192-fp16" + }, + { + "name": "faiss_ivf_sq.nlist16384-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 16384, + "quantizer_type": "fp16" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist16384-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist16384-fp16" + }, + { + "name": "faiss_ivf_sq.nlist1024-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 1024, + "quantizer_type": "int8" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist1024-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist1024-int8" + }, + { + "name": "faiss_ivf_sq.nlist2048-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 2048, + "quantizer_type": "int8" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist2048-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist2048-int8" + }, + { + "name": "faiss_ivf_sq.nlist4096-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 4096, + "quantizer_type": "int8" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist4096-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist4096-int8" + }, + { + "name": "faiss_ivf_sq.nlist8192-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 8192, + "quantizer_type": "int8" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist8192-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist8192-int8" + }, + { + "name": "faiss_ivf_sq.nlist16384-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 16384, + "quantizer_type": "int8" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist16384-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist16384-int8" + }, + { + "name": "faiss_flat", + "algo": "faiss_gpu_flat", + "build_param": {}, + "file": "index/glove-50-inner/faiss_flat/flat", + "search_params": [ + {} + ], + "search_result_file": "result/glove-50-inner/faiss_flat/flat" + }, + + { + "name": "raft_ivf_pq.dimpq128-cluster1024", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-float-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-float", + "search_params": [ + { + "k": 10, + "numProbes": 1, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 5, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-float" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-float-half", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-half", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-half" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 64, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq64-cluster1024-float-half", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 64, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq64-cluster1024-float-half", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq64-cluster1024-float-half" + }, + { + "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 32, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 16, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-half-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-half-float", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-half-float" + }, + { + "name": "raft_ivf_pq.dimpq512-cluster1024-float-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 512, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq512-cluster1024-float-float", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq512-cluster1024-float-float" + }, + { + "name": "raft_ivf_flat.nlist1024", + "algo": "raft_ivf_flat", + "build_param": { + "nlist": 1024, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_flat/nlist1024", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_flat/nlist1024" + }, + { + "name": "raft_ivf_flat.nlist16384", + "algo": "raft_ivf_flat", + "build_param": { + "nlist": 16384, + "ratio": 2, + "niter": 20 + }, + "file": "index/glove-50-inner/raft_ivf_flat/nlist16384", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_flat/nlist16384" + }, + + { + "name" : "raft_cagra.dim32", + "algo" : "raft_cagra", + "build_param": { + "graph_degree" : 32 + }, + "file" : "index/glove-50-inner/raft_cagra/dim32", + "search_params" : [ + {"itopk": 32}, + {"itopk": 64}, + {"itopk": 128} + ], + "search_result_file" : "result/glove-50-inner/raft_cagra/dim32" + }, + + { + "name" : "raft_cagra.dim64", + "algo" : "raft_cagra", + "build_param": { + "graph_degree" : 64 + }, + "file" : "index/glove-50-inner/raft_cagra/dim64", + "search_params" : [ + {"itopk": 32}, + {"itopk": 64}, + {"itopk": 128} + ], + "search_result_file" : "result/glove-50-inner/raft_cagra/dim64" + } + ] +} diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json index 2a493edeed..343deb8927 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json @@ -3,6 +3,7 @@ "name": "mnist-784-euclidean", "base_file": "mnist-784-euclidean/base.fbin", "query_file": "mnist-784-euclidean/query.fbin", + "groundtruth_neighbors_file": "mnist-784-euclidean/groundtruth.neighbors.ibin", "distance": "euclidean" }, "search_basic_param": { diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json index 630b700ba5..e94a9969d9 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json @@ -3,6 +3,7 @@ "name": "nytimes-256-angular", "base_file": "nytimes-256-angular/base.fbin", "query_file": "nytimes-256-angular/query.fbin", + "groundtruth_neighbors_file": "nytimes-256-angular/groundtruth.neighbors.ibin", "distance": "euclidean" }, "search_basic_param": { diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-inner.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-inner.json new file mode 100644 index 0000000000..f849abad35 --- /dev/null +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-inner.json @@ -0,0 +1,1352 @@ +{ + "dataset": { + "name": "nytimes-256-inner", + "base_file": "nytimes-256-inner/base.fbin", + "query_file": "nytimes-256-inner/query.fbin", + "groundtruth_neighbors_file": "nytimes-256-inner/groundtruth.neighbors.ibin", + "distance": "euclidean" + }, + "search_basic_param": { + "batch_size": 5000, + "k": 10, + "run_count": 3 + }, + "index": [ + { + "name" : "hnswlib.M12", + "algo" : "hnswlib", + "build_param": {"M":12, "efConstruction":500, "numThreads":32}, + "file" : "index/nytimes-256-inner/hnswlib/M12", + "search_params" : [ + {"ef":10, "numThreads":1}, + {"ef":20, "numThreads":1}, + {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, + {"ef":80, "numThreads":1}, + {"ef":120, "numThreads":1}, + {"ef":200, "numThreads":1}, + {"ef":400, "numThreads":1}, + {"ef":600, "numThreads":1}, + {"ef":800, "numThreads":1} + ], + "search_result_file" : "result/nytimes-256-inner/hnswlib/M12" + }, + { + "name" : "hnswlib.M16", + "algo" : "hnswlib", + "build_param": {"M":16, "efConstruction":500, "numThreads":32}, + "file" : "index/nytimes-256-inner/hnswlib/M16", + "search_params" : [ + {"ef":10, "numThreads":1}, + {"ef":20, "numThreads":1}, + {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, + {"ef":80, "numThreads":1}, + {"ef":120, "numThreads":1}, + {"ef":200, "numThreads":1}, + {"ef":400, "numThreads":1}, + {"ef":600, "numThreads":1}, + {"ef":800, "numThreads":1} + ], + "search_result_file" : "result/nytimes-256-inner/hnswlib/M16" + }, + { + "name" : "hnswlib.M24", + "algo" : "hnswlib", + "build_param": {"M":24, "efConstruction":500, "numThreads":32}, + "file" : "index/nytimes-256-inner/hnswlib/M24", + "search_params" : [ + {"ef":10, "numThreads":1}, + {"ef":20, "numThreads":1}, + {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, + {"ef":80, "numThreads":1}, + {"ef":120, "numThreads":1}, + {"ef":200, "numThreads":1}, + {"ef":400, "numThreads":1}, + {"ef":600, "numThreads":1}, + {"ef":800, "numThreads":1} + ], + "search_result_file" : "result/nytimes-256-inner/hnswlib/M24" + }, + { + "name" : "hnswlib.M36", + "algo" : "hnswlib", + "build_param": {"M":36, "efConstruction":500, "numThreads":32}, + "file" : "index/nytimes-256-inner/hnswlib/M36", + "search_params" : [ + {"ef":10, "numThreads":1}, + {"ef":20, "numThreads":1}, + {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, + {"ef":80, "numThreads":1}, + {"ef":120, "numThreads":1}, + {"ef":200, "numThreads":1}, + {"ef":400, "numThreads":1}, + {"ef":600, "numThreads":1}, + {"ef":800, "numThreads":1} + ], + "search_result_file" : "result/nytimes-256-inner/hnswlib/M36" + }, + + + + + { + "name": "raft_bfknn", + "algo": "raft_bfknn", + "build_param": {}, + "file": "index/nytimes-256-inner/raft_bfknn/bfknn", + "search_params": [ + { + "probe": 1 + } + ], + "search_result_file": "result/nytimes-256-inner/raft_bfknn/bfknn" + }, + { + "name": "faiss_ivf_flat.nlist1024", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 1024 + }, + "file": "index/nytimes-256-inner/faiss_ivf_flat/nlist1024", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_flat/nlist1024" + }, + { + "name": "faiss_ivf_flat.nlist2048", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 2048 + }, + "file": "index/nytimes-256-inner/faiss_ivf_flat/nlist2048", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_flat/nlist2048" + }, + { + "name": "faiss_ivf_flat.nlist4096", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 4096 + }, + "file": "index/nytimes-256-inner/faiss_ivf_flat/nlist4096", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_flat/nlist4096" + }, + { + "name": "faiss_ivf_flat.nlist8192", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 8192 + }, + "file": "index/nytimes-256-inner/faiss_ivf_flat/nlist8192", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_flat/nlist8192" + }, + { + "name": "faiss_ivf_flat.nlist16384", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 16384 + }, + "file": "index/nytimes-256-inner/faiss_ivf_flat/nlist16384", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_flat/nlist16384" + }, + { + "name": "faiss_ivf_pq.M64-nlist1024", + "algo": "faiss_gpu_ivf_pq", + "build_param": { + "nlist": 1024, + "M": 64, + "useFloat16": true, + "usePrecomputed": true + }, + "file": "index/nytimes-256-inner/faiss_ivf_pq/M64-nlist1024", + "search_params": [ + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_pq/M64-nlist1024" + }, + { + "name": "faiss_ivf_pq.M64-nlist1024.noprecomp", + "algo": "faiss_gpu_ivf_pq", + "build_param": { + "nlist": 1024, + "M": 64, + "useFloat16": true, + "usePrecomputed": false + }, + "file": "index/nytimes-256-inner/faiss_ivf_pq/M64-nlist1024.noprecomp", + "search_params": [ + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_pq/M64-nlist1024" + }, + { + "name": "faiss_ivf_sq.nlist1024-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 1024, + "quantizer_type": "fp16" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist1024-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist1024-fp16" + }, + { + "name": "faiss_ivf_sq.nlist2048-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 2048, + "quantizer_type": "fp16" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist2048-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist2048-fp16" + }, + { + "name": "faiss_ivf_sq.nlist4096-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 4096, + "quantizer_type": "fp16" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist4096-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist4096-fp16" + }, + { + "name": "faiss_ivf_sq.nlist8192-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 8192, + "quantizer_type": "fp16" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist8192-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist8192-fp16" + }, + { + "name": "faiss_ivf_sq.nlist16384-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 16384, + "quantizer_type": "fp16" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist16384-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist16384-fp16" + }, + { + "name": "faiss_ivf_sq.nlist1024-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 1024, + "quantizer_type": "int8" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist1024-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist1024-int8" + }, + { + "name": "faiss_ivf_sq.nlist2048-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 2048, + "quantizer_type": "int8" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist2048-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist2048-int8" + }, + { + "name": "faiss_ivf_sq.nlist4096-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 4096, + "quantizer_type": "int8" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist4096-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist4096-int8" + }, + { + "name": "faiss_ivf_sq.nlist8192-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 8192, + "quantizer_type": "int8" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist8192-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist8192-int8" + }, + { + "name": "faiss_ivf_sq.nlist16384-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 16384, + "quantizer_type": "int8" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist16384-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist16384-int8" + }, + { + "name": "faiss_flat", + "algo": "faiss_gpu_flat", + "build_param": {}, + "file": "index/nytimes-256-inner/faiss_flat/flat", + "search_params": [ + {} + ], + "search_result_file": "result/nytimes-256-inner/faiss_flat/flat" + }, + + { + "name": "raft_ivf_pq.dimpq128-cluster1024", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-float-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-float", + "search_params": [ + { + "k": 10, + "numProbes": 1, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 5, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-float" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-float-half", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-half", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-half" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 64, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq64-cluster1024-float-half", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 64, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq64-cluster1024-float-half", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq64-cluster1024-float-half" + }, + { + "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 32, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 16, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-half-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-half-float", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-half-float" + }, + { + "name": "raft_ivf_pq.dimpq512-cluster1024-float-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 512, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq512-cluster1024-float-float", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq512-cluster1024-float-float" + }, + { + "name": "raft_ivf_flat.nlist1024", + "algo": "raft_ivf_flat", + "build_param": { + "nlist": 1024, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_flat/nlist1024", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_flat/nlist1024" + }, + { + "name": "raft_ivf_flat.nlist16384", + "algo": "raft_ivf_flat", + "build_param": { + "nlist": 16384, + "ratio": 2, + "niter": 20 + }, + "file": "index/nytimes-256-inner/raft_ivf_flat/nlist16384", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_flat/nlist16384" + }, + + { + "name" : "raft_cagra.dim32", + "algo" : "raft_cagra", + "build_param": { + "graph_degree" : 32 + }, + "file" : "index/nytimes-256-inner/raft_cagra/dim32", + "search_params" : [ + {"itopk": 32}, + {"itopk": 64}, + {"itopk": 128} + ], + "search_result_file" : "result/nytimes-256-inner/raft_cagra/dim32" + }, + + { + "name" : "raft_cagra.dim64", + "algo" : "raft_cagra", + "build_param": { + "graph_degree" : 64 + }, + "file" : "index/nytimes-256-inner/raft_cagra/dim64", + "search_params" : [ + {"itopk": 32}, + {"itopk": 64}, + {"itopk": 128} + ], + "search_result_file" : "result/nytimes-256-inner/raft_cagra/dim64" + } + ] +} From dfde3b499a403af433e1e27b9174461ec086adc9 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Mon, 25 Sep 2023 10:02:07 -0700 Subject: [PATCH 7/9] Fixes for OOM during CAGRA benchmarks (#1832) Running the CAGRA benchmarks and there could be OOM errors on GPU memory with large datasets. This is caused by holding multiple copies of the dataset in GPU memory. Fix by: * Free existing memory for the dataset/graph before allocating new memory during update_dataset/update_grph * On deserialize, if the serialized index doesn't contain the dataset - don't allocate GPU memory for it * Don't call update_dataset repeatedly in the benchmarking code with the same dataset Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1832 --- cpp/include/raft/neighbors/cagra_types.hpp | 19 +++++++++++++++---- .../detail/cagra/cagra_serialize.cuh | 19 +++++++++++++------ 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/cpp/include/raft/neighbors/cagra_types.hpp b/cpp/include/raft/neighbors/cagra_types.hpp index 02e3f5338e..4728178194 100644 --- a/cpp/include/raft/neighbors/cagra_types.hpp +++ b/cpp/include/raft/neighbors/cagra_types.hpp @@ -165,9 +165,10 @@ struct index : ann::index { ~index() = default; /** Construct an empty index. */ - index(raft::resources const& res) + index(raft::resources const& res, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Expanded) : ann::index(), - metric_(raft::distance::DistanceType::L2Expanded), + metric_(metric), dataset_(make_device_matrix(res, 0, 0)), graph_(make_device_matrix(res, 0, 0)) { @@ -296,7 +297,11 @@ struct index : ann::index { raft::host_matrix_view knn_graph) { RAFT_LOG_DEBUG("Copying CAGRA knn graph from host to device"); - graph_ = make_device_matrix(res, knn_graph.extent(0), knn_graph.extent(1)); + if ((graph_.extent(0) != knn_graph.extent(0)) || (graph_.extent(1) != knn_graph.extent(1))) { + // clear existing memory before allocating to prevent OOM errors on large graphs + if (graph_.size()) { graph_ = make_device_matrix(res, 0, 0); } + graph_ = make_device_matrix(res, knn_graph.extent(0), knn_graph.extent(1)); + } raft::copy(graph_.data_handle(), knn_graph.data_handle(), knn_graph.size(), @@ -311,7 +316,13 @@ struct index : ann::index { mdspan, row_major, data_accessor> dataset) { size_t padded_dim = round_up_safe(dataset.extent(1) * sizeof(T), 16) / sizeof(T); - dataset_ = make_device_matrix(res, dataset.extent(0), padded_dim); + + if ((dataset_.extent(0) != dataset.extent(0)) || + (static_cast(dataset_.extent(1)) != padded_dim)) { + // clear existing memory before allocating to prevent OOM errors on large datasets + if (dataset_.size()) { dataset_ = make_device_matrix(res, 0, 0); } + dataset_ = make_device_matrix(res, dataset.extent(0), padded_dim); + } if (dataset_.extent(1) == dataset.extent(1)) { raft::copy(dataset_.data_handle(), dataset.data_handle(), diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh index 234911e15c..8261f637e1 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh @@ -130,15 +130,22 @@ auto deserialize(raft::resources const& res, std::istream& is) -> index auto graph_degree = deserialize_scalar(res, is); auto metric = deserialize_scalar(res, is); - auto dataset = raft::make_host_matrix(n_rows, dim); - auto graph = raft::make_host_matrix(n_rows, graph_degree); + auto graph = raft::make_host_matrix(n_rows, graph_degree); deserialize_mdspan(res, is, graph.view()); bool has_dataset = deserialize_scalar(res, is); - if (has_dataset) { deserialize_mdspan(res, is, dataset.view()); } - - return index( - res, metric, raft::make_const_mdspan(dataset.view()), raft::make_const_mdspan(graph.view())); + if (has_dataset) { + auto dataset = raft::make_host_matrix(n_rows, dim); + deserialize_mdspan(res, is, dataset.view()); + return index( + res, metric, raft::make_const_mdspan(dataset.view()), raft::make_const_mdspan(graph.view())); + } else { + // create a new index with no dataset - the user must supply via update_dataset themselves + // later (this avoids allocating GPU memory in the meantime) + index idx(res, metric); + idx.update_graph(res, raft::make_const_mdspan(graph.view())); + return idx; + } } template From 8522a143ba9fd25d1054b93edc308d219043751b Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 25 Sep 2023 14:25:23 -0500 Subject: [PATCH 8/9] Fix conf file for benchmarking glove datasets (#1846) Authors: - Dante Gama Dessavre (https://github.com/dantegd) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1846 --- .../run/conf/glove-100-angular.json | 126 +++++++++--------- .../run/conf/glove-100-inner.json | 126 +++++++++--------- .../run/conf/glove-50-angular.json | 126 +++++++++--------- .../run/conf/glove-50-inner.json | 126 +++++++++--------- 4 files changed, 252 insertions(+), 252 deletions(-) diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json index 526aef2db0..3595084d19 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json @@ -735,37 +735,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "half" } @@ -785,55 +785,55 @@ "search_params": [ { "k": 10, - "numProbes": 1, + "nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1, + "nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 5, + "nprobe": 5, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float" } @@ -853,37 +853,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half" } @@ -903,37 +903,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -953,37 +953,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1003,37 +1003,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half" } @@ -1053,37 +1053,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1103,37 +1103,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1153,37 +1153,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "float" } @@ -1203,37 +1203,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float" } diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json index 54c8bf908c..8b9f1cfb35 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json @@ -735,37 +735,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "half" } @@ -785,55 +785,55 @@ "search_params": [ { "k": 10, - "numProbes": 1, + "nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1, + "nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 5, + "nprobe": 5, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float" } @@ -853,37 +853,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half" } @@ -903,37 +903,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -953,37 +953,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1003,37 +1003,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half" } @@ -1053,37 +1053,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1103,37 +1103,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1153,37 +1153,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "float" } @@ -1203,37 +1203,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float" } diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json index 9b3f192c9f..0f02620cb2 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json @@ -735,37 +735,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "half" } @@ -785,55 +785,55 @@ "search_params": [ { "k": 10, - "numProbes": 1, + "nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1, + "nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 5, + "nprobe": 5, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float" } @@ -853,37 +853,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half" } @@ -903,37 +903,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -953,37 +953,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1003,37 +1003,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half" } @@ -1053,37 +1053,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1103,37 +1103,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1153,37 +1153,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "float" } @@ -1203,37 +1203,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float" } diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-inner.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-inner.json index 0fe7ac24df..41dec5adb3 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-inner.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-inner.json @@ -735,37 +735,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "half" } @@ -785,55 +785,55 @@ "search_params": [ { "k": 10, - "numProbes": 1, + "nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1, + "nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 5, + "nprobe": 5, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float" } @@ -853,37 +853,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half" } @@ -903,37 +903,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -953,37 +953,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1003,37 +1003,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half" } @@ -1053,37 +1053,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1103,37 +1103,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1153,37 +1153,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "float" } @@ -1203,37 +1203,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float" } From cb24d998771a72ea6bad12a65cfb4aaf6ab0122e Mon Sep 17 00:00:00 2001 From: tsuki <12711693+enp1s0@users.noreply.github.com> Date: Tue, 26 Sep 2023 04:09:43 +0800 Subject: [PATCH 9/9] [FEA] Add pre-filtering to CAGRA (#1811) This PR adds the pre-filtering feature to the CAGRA search implementations. Rel: taken over from https://github.com/rapidsai/raft/pull/1765 ## Algorithm The pre-filtering algorithm removes a node that should not be in the final result after it has behaved as a parent node. This way, the nodes that should not be in the final result are also used in the graph traversal, avoiding potential performance degradation. ## Changes - Add filtering operation on a parent node after internal top-M buffer candidate calculation. - Add filtering operation to result buffer before storing them in the device memory. Authors: - tsuki (https://github.com/enp1s0) - Micka (https://github.com/lowener) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Tamas Bela Feher (https://github.com/tfeher) - Micka (https://github.com/lowener) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1811 --- cpp/include/raft/neighbors/cagra.cuh | 75 +++++- .../neighbors/detail/cagra/cagra_search.cuh | 60 ++++- .../detail/cagra/compute_distance.hpp | 13 +- .../raft/neighbors/detail/cagra/factory.cuh | 42 ++-- .../detail/cagra/search_multi_cta.cuh | 80 +++---- .../cagra/search_multi_cta_kernel-ext.cuh | 94 ++++---- .../cagra/search_multi_cta_kernel-inl.cuh | 89 ++++++-- .../detail/cagra/search_multi_kernel.cuh | 215 +++++++++++++----- .../neighbors/detail/cagra/search_plan.cuh | 9 +- .../detail/cagra/search_single_cta.cuh | 70 +++--- .../cagra/search_single_cta_kernel-ext.cuh | 96 ++++---- .../cagra/search_single_cta_kernel-inl.cuh | 214 ++++++++++++++--- cpp/include/raft/neighbors/ivf_flat-inl.cuh | 12 +- cpp/include/raft/neighbors/ivf_pq-inl.cuh | 12 +- .../raft/neighbors/sample_filter_types.hpp | 12 + .../cagra/search_multi_cta_00_generate.py | 58 ++--- ...arch_multi_cta_float_uint32_dim1024_t32.cu | 55 +++-- ...search_multi_cta_float_uint32_dim128_t8.cu | 55 +++-- ...earch_multi_cta_float_uint32_dim256_t16.cu | 55 +++-- ...earch_multi_cta_float_uint32_dim512_t32.cu | 55 +++-- ...arch_multi_cta_float_uint64_dim1024_t32.cu | 55 +++-- ...search_multi_cta_float_uint64_dim128_t8.cu | 55 +++-- ...earch_multi_cta_float_uint64_dim256_t16.cu | 55 +++-- ...earch_multi_cta_float_uint64_dim512_t32.cu | 55 +++-- ...earch_multi_cta_int8_uint32_dim1024_t32.cu | 55 +++-- .../search_multi_cta_int8_uint32_dim128_t8.cu | 55 +++-- ...search_multi_cta_int8_uint32_dim256_t16.cu | 55 +++-- ...search_multi_cta_int8_uint32_dim512_t32.cu | 55 +++-- ...arch_multi_cta_uint8_uint32_dim1024_t32.cu | 55 +++-- ...search_multi_cta_uint8_uint32_dim128_t8.cu | 55 +++-- ...earch_multi_cta_uint8_uint32_dim256_t16.cu | 55 +++-- ...earch_multi_cta_uint8_uint32_dim512_t32.cu | 55 +++-- .../cagra/search_single_cta_00_generate.py | 59 ++--- ...rch_single_cta_float_uint32_dim1024_t32.cu | 58 ++--- ...earch_single_cta_float_uint32_dim128_t8.cu | 58 ++--- ...arch_single_cta_float_uint32_dim256_t16.cu | 58 ++--- ...arch_single_cta_float_uint32_dim512_t32.cu | 58 ++--- ...rch_single_cta_float_uint64_dim1024_t32.cu | 58 ++--- ...earch_single_cta_float_uint64_dim128_t8.cu | 58 ++--- ...arch_single_cta_float_uint64_dim256_t16.cu | 58 ++--- ...arch_single_cta_float_uint64_dim512_t32.cu | 58 ++--- ...arch_single_cta_int8_uint32_dim1024_t32.cu | 58 ++--- ...search_single_cta_int8_uint32_dim128_t8.cu | 58 ++--- ...earch_single_cta_int8_uint32_dim256_t16.cu | 58 ++--- ...earch_single_cta_int8_uint32_dim512_t32.cu | 58 ++--- ...rch_single_cta_uint8_uint32_dim1024_t32.cu | 58 ++--- ...earch_single_cta_uint8_uint32_dim128_t8.cu | 58 ++--- ...arch_single_cta_uint8_uint32_dim256_t16.cu | 58 ++--- ...arch_single_cta_uint8_uint32_dim512_t32.cu | 58 ++--- cpp/test/neighbors/ann_cagra.cuh | 177 +++++++++++++- .../ann_cagra/search_kernel_uint64_t.cuh | 200 ++++++++-------- .../neighbors/ann_cagra/test_float_int64_t.cu | 4 +- .../ann_cagra/test_float_uint32_t.cu | 8 +- .../ann_cagra/test_int8_t_uint32_t.cu | 7 +- .../ann_cagra/test_uint8_t_uint32_t.cu | 8 +- 55 files changed, 2142 insertions(+), 1280 deletions(-) diff --git a/cpp/include/raft/neighbors/cagra.cuh b/cpp/include/raft/neighbors/cagra.cuh index 903d0571dc..1bd7010c83 100644 --- a/cpp/include/raft/neighbors/cagra.cuh +++ b/cpp/include/raft/neighbors/cagra.cuh @@ -54,14 +54,14 @@ namespace raft::neighbors::cagra { * // use default index parameters * cagra::index_params build_params; * cagra::search_params search_params - * auto knn_graph = raft::make_host_matrix(dataset.extent(0), 128); + * auto knn_graph = raft::make_host_matrix(dataset.extent(0), 128); * // create knn graph * cagra::build_knn_graph(res, dataset, knn_graph.view(), 2, build_params, search_params); - * auto optimized_gaph = raft::make_host_matrix(dataset.extent(0), 64); + * auto optimized_gaph = raft::make_host_matrix(dataset.extent(0), 64); * cagra::optimize(res, dataset, knn_graph.view(), optimized_graph.view()); * // Construct an index from dataset and optimized knn_graph * auto index = cagra::index(res, build_params.metric(), dataset, - * optimized_graph.view()); + * optimized_graph.view()); * @endcode * * @tparam DataT data element type @@ -106,7 +106,7 @@ void build_knn_graph(raft::resources const& res, * @code{.cpp} * using namespace raft::neighbors; * cagra::index_params build_params; - * auto knn_graph = raft::make_host_matrix(dataset.extent(0), 128); + * auto knn_graph = raft::make_host_matrix(dataset.extent(0), 128); * // build KNN graph not using `cagra::build_knn_graph` * // build(knn_graph, dataset, ...); * // sort graph index @@ -115,7 +115,7 @@ void build_knn_graph(raft::resources const& res, * cagra::optimize(res, dataset, knn_graph.view(), optimized_graph.view()); * // Construct an index from dataset and optimized knn_graph * auto index = cagra::index(res, build_params.metric(), dataset, - * optimized_graph.view()); + * optimized_graph.view()); * @endcode * * @tparam DataT type of the data in the source dataset @@ -316,9 +316,70 @@ void search(raft::resources const& res, auto distances_internal = raft::make_device_matrix_view( distances.data_handle(), distances.extent(0), distances.extent(1)); - cagra::detail::search_main( - res, params, idx, queries_internal, neighbors_internal, distances_internal); + cagra::detail::search_main(res, + params, + idx, + queries_internal, + neighbors_internal, + distances_internal, + raft::neighbors::filtering::none_cagra_sample_filter()); } + +/** + * @brief Search ANN using the constructed index with the given sample filter. + * + * See the [cagra::build](#cagra::build) documentation for a usage example. + * + * @tparam T data element type + * @tparam IdxT type of the indices + * @tparam CagraSampleFilterT Device filter function, with the signature + * `(uint32_t query ix, uint32_t sample_ix) -> bool` + * + * @param[in] res raft resources + * @param[in] params configure the search + * @param[in] idx cagra index + * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] + * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset + * [n_queries, k] + * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, + * k] + * @param[in] sample_filter a device filter function that greenlights samples for a given query + */ +template +void search_with_filtering(raft::resources const& res, + const search_params& params, + const index& idx, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + CagraSampleFilterT sample_filter = CagraSampleFilterT()) +{ + RAFT_EXPECTS( + queries.extent(0) == neighbors.extent(0) && queries.extent(0) == distances.extent(0), + "Number of rows in output neighbors and distances matrices must equal the number of queries."); + + RAFT_EXPECTS(neighbors.extent(1) == distances.extent(1), + "Number of columns in output neighbors and distances matrices must equal k"); + RAFT_EXPECTS(queries.extent(1) == idx.dim(), + "Number of query dimensions should equal number of dimensions in the index."); + + using internal_IdxT = typename std::make_unsigned::type; + auto queries_internal = raft::make_device_matrix_view( + queries.data_handle(), queries.extent(0), queries.extent(1)); + auto neighbors_internal = raft::make_device_matrix_view( + reinterpret_cast(neighbors.data_handle()), + neighbors.extent(0), + neighbors.extent(1)); + auto distances_internal = raft::make_device_matrix_view( + distances.data_handle(), distances.extent(0), distances.extent(1)); + + cagra::detail::search_main( + res, params, idx, queries_internal, neighbors_internal, distances_internal, sample_filter); +} + /** @} */ // end group cagra } // namespace raft::neighbors::cagra diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index b484fa55f9..81e714dc4e 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -34,6 +35,48 @@ namespace raft::neighbors::cagra::detail { +template +struct CagraSampleFilterWithQueryIdOffset { + const uint32_t offset; + CagraSampleFilterT filter; + + CagraSampleFilterWithQueryIdOffset(const uint32_t offset, const CagraSampleFilterT filter) + : offset(offset), filter(filter) + { + } + + _RAFT_DEVICE auto operator()(const uint32_t query_id, const uint32_t sample_id) + { + return filter(query_id + offset, sample_id); + } +}; + +template +struct CagraSampleFilterT_Selector { + using type = CagraSampleFilterWithQueryIdOffset; +}; +template <> +struct CagraSampleFilterT_Selector { + using type = raft::neighbors::filtering::none_cagra_sample_filter; +}; + +// A helper function to set a query id offset +template +inline typename CagraSampleFilterT_Selector::type set_offset( + CagraSampleFilterT filter, const uint32_t offset) +{ + typename CagraSampleFilterT_Selector::type new_filter(offset, filter); + return new_filter; +} +template <> +inline + typename CagraSampleFilterT_Selector::type + set_offset( + raft::neighbors::filtering::none_cagra_sample_filter filter, const uint32_t) +{ + return filter; +} + /** * @brief Search ANN using the constructed index. * @@ -54,13 +97,18 @@ namespace raft::neighbors::cagra::detail { * k] */ -template +template void search_main(raft::resources const& res, search_params params, const index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, - raft::device_matrix_view distances) + raft::device_matrix_view distances, + CagraSampleFilterT sample_filter = CagraSampleFilterT()) { resource::detail::warn_non_pool_workspace(res, "raft::neighbors::cagra::search"); RAFT_LOG_DEBUG("# dataset size = %lu, dim = %lu\n", @@ -77,8 +125,9 @@ void search_main(raft::resources const& res, common::nvtx::range fun_scope( "cagra::search(max_queries = %u, k = %u, dim = %zu)", params.max_queries, topk, index.dim()); - std::unique_ptr> plan = - factory::create( + using CagraSampleFilterT_s = typename CagraSampleFilterT_Selector::type; + std::unique_ptr> plan = + factory::create( res, params, index.dim(), index.graph_degree(), topk); plan->check(neighbors.extent(1)); @@ -119,7 +168,8 @@ void search_main(raft::resources const& res, n_queries, _seed_ptr, _num_executed_iterations, - topk); + topk, + set_offset(sample_filter, qid)); } static_assert(std::is_same_v, diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp index 47e976e252..624c1a35d6 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp @@ -155,17 +155,20 @@ _RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_in INDEX_T* const visited_hashmap_ptr, const std::uint32_t hash_bitlen, const INDEX_T* const parent_indices, + const INDEX_T* const internal_topk_list, const std::uint32_t search_width) { - const INDEX_T invalid_index = utils::get_max_value(); + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const INDEX_T invalid_index = utils::get_max_value(); // Read child indices of parents from knn graph and check if the distance // computaiton is necessary. for (uint32_t i = threadIdx.x; i < knn_k * search_width; i += BLOCK_SIZE) { - const INDEX_T parent_id = parent_indices[i / knn_k]; - INDEX_T child_id = invalid_index; - if (parent_id != invalid_index) { - child_id = knn_graph[(i % knn_k) + ((uint64_t)knn_k * parent_id)]; + const INDEX_T smem_parent_id = parent_indices[i / knn_k]; + INDEX_T child_id = invalid_index; + if (smem_parent_id != invalid_index) { + const auto parent_id = internal_topk_list[smem_parent_id] & ~index_msb_1_mask; + child_id = knn_graph[(i % knn_k) + ((uint64_t)knn_k * parent_id)]; } if (child_id != invalid_index) { if (hashmap::insert(visited_hashmap_ptr, hash_bitlen, child_id) == 0) { diff --git a/cpp/include/raft/neighbors/detail/cagra/factory.cuh b/cpp/include/raft/neighbors/detail/cagra/factory.cuh index 625040194b..78111a9310 100644 --- a/cpp/include/raft/neighbors/detail/cagra/factory.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/factory.cuh @@ -20,20 +20,25 @@ #include "search_multi_kernel.cuh" #include "search_plan.cuh" #include "search_single_cta.cuh" +#include namespace raft::neighbors::cagra::detail { -template +template class factory { public: /** * Create a search structure for dataset with dim features. */ - static std::unique_ptr> create(raft::resources const& res, - search_params const& params, - int64_t dim, - int64_t graph_degree, - uint32_t topk) + static std::unique_ptr> create( + raft::resources const& res, + search_params const& params, + int64_t dim, + int64_t graph_degree, + uint32_t topk) { search_plan_impl_base plan(params, dim, graph_degree, topk); switch (plan.max_dim) { @@ -63,26 +68,29 @@ class factory { break; default: RAFT_LOG_DEBUG("Incorrect max_dim (%lu)\n", plan.max_dim); } - return std::unique_ptr>(); + return std::unique_ptr>(); } private: template - static std::unique_ptr> dispatch_kernel( + static std::unique_ptr> dispatch_kernel( raft::resources const& res, search_plan_impl_base& plan) { if (plan.algo == search_algo::SINGLE_CTA) { - return std::unique_ptr>( - new single_cta_search::search( - res, plan, plan.dim, plan.graph_degree, plan.topk)); + return std::unique_ptr>( + new single_cta_search:: + search( + res, plan, plan.dim, plan.graph_degree, plan.topk)); } else if (plan.algo == search_algo::MULTI_CTA) { - return std::unique_ptr>( - new multi_cta_search::search( - res, plan, plan.dim, plan.graph_degree, plan.topk)); + return std::unique_ptr>( + new multi_cta_search:: + search( + res, plan, plan.dim, plan.graph_degree, plan.topk)); } else { - return std::unique_ptr>( - new multi_kernel_search::search( - res, plan, plan.dim, plan.graph_degree, plan.topk)); + return std::unique_ptr>( + new multi_kernel_search:: + search( + res, plan, plan.dim, plan.graph_degree, plan.topk)); } } }; diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh index 6ea1e34032..9a722a6dfe 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh @@ -48,42 +48,43 @@ template - -struct search : public search_plan_impl { - using search_plan_impl::max_queries; - using search_plan_impl::itopk_size; - using search_plan_impl::algo; - using search_plan_impl::team_size; - using search_plan_impl::search_width; - using search_plan_impl::min_iterations; - using search_plan_impl::max_iterations; - using search_plan_impl::thread_block_size; - using search_plan_impl::hashmap_mode; - using search_plan_impl::hashmap_min_bitlen; - using search_plan_impl::hashmap_max_fill_rate; - using search_plan_impl::num_random_samplings; - using search_plan_impl::rand_xor_mask; - - using search_plan_impl::max_dim; - using search_plan_impl::dim; - using search_plan_impl::graph_degree; - using search_plan_impl::topk; - - using search_plan_impl::hash_bitlen; - - using search_plan_impl::small_hash_bitlen; - using search_plan_impl::small_hash_reset_interval; - using search_plan_impl::hashmap_size; - using search_plan_impl::dataset_size; - using search_plan_impl::result_buffer_size; - - using search_plan_impl::smem_size; - - using search_plan_impl::hashmap; - using search_plan_impl::num_executed_iterations; - using search_plan_impl::dev_seed; - using search_plan_impl::num_seeds; + typename DISTANCE_T, + typename SAMPLE_FILTER_T> + +struct search : public search_plan_impl { + using search_plan_impl::max_queries; + using search_plan_impl::itopk_size; + using search_plan_impl::algo; + using search_plan_impl::team_size; + using search_plan_impl::search_width; + using search_plan_impl::min_iterations; + using search_plan_impl::max_iterations; + using search_plan_impl::thread_block_size; + using search_plan_impl::hashmap_mode; + using search_plan_impl::hashmap_min_bitlen; + using search_plan_impl::hashmap_max_fill_rate; + using search_plan_impl::num_random_samplings; + using search_plan_impl::rand_xor_mask; + + using search_plan_impl::max_dim; + using search_plan_impl::dim; + using search_plan_impl::graph_degree; + using search_plan_impl::topk; + + using search_plan_impl::hash_bitlen; + + using search_plan_impl::small_hash_bitlen; + using search_plan_impl::small_hash_reset_interval; + using search_plan_impl::hashmap_size; + using search_plan_impl::dataset_size; + using search_plan_impl::result_buffer_size; + + using search_plan_impl::smem_size; + + using search_plan_impl::hashmap; + using search_plan_impl::num_executed_iterations; + using search_plan_impl::dev_seed; + using search_plan_impl::num_seeds; uint32_t num_cta_per_query; rmm::device_uvector intermediate_indices; @@ -96,7 +97,8 @@ struct search : public search_plan_impl { int64_t dim, int64_t graph_degree, uint32_t topk) - : search_plan_impl(res, params, dim, graph_degree, topk), + : search_plan_impl( + res, params, dim, graph_degree, topk), intermediate_indices(0, resource::get_cuda_stream(res)), intermediate_distances(0, resource::get_cuda_stream(res)), topk_workspace(0, resource::get_cuda_stream(res)) @@ -196,7 +198,8 @@ struct search : public search_plan_impl { const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] - uint32_t topk) + uint32_t topk, + SAMPLE_FILTER_T sample_filter) { cudaStream_t stream = resource::get_cuda_stream(res); @@ -223,6 +226,7 @@ struct search : public search_plan_impl { search_width, min_iterations, max_iterations, + sample_filter, stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh index de83acbb64..ee525587d7 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh @@ -15,7 +15,8 @@ */ #pragma once -#include // RAFT_EXPLICIT +#include // none_cagra_sample_filter +#include // RAFT_EXPLICIT namespace raft::neighbors::cagra::detail { namespace multi_cta_search { @@ -26,7 +27,8 @@ template + class DISTANCE_T, + class SAMPLE_FILTER_T> void select_and_run(raft::device_matrix_view dataset, raft::device_matrix_view graph, INDEX_T* const topk_indices_ptr, @@ -49,47 +51,63 @@ void select_and_run(raft::device_matrix_view( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 1024, float, uint32_t, float); -instantiate_kernel_selection(8, 128, float, uint32_t, float); -instantiate_kernel_selection(16, 256, float, uint32_t, float); -instantiate_kernel_selection(32, 512, float, uint32_t, float); -instantiate_kernel_selection(32, 1024, int8_t, uint32_t, float); -instantiate_kernel_selection(8, 128, int8_t, uint32_t, float); -instantiate_kernel_selection(16, 256, int8_t, uint32_t, float); -instantiate_kernel_selection(32, 512, int8_t, uint32_t, float); -instantiate_kernel_selection(32, 1024, uint8_t, uint32_t, float); -instantiate_kernel_selection(8, 128, uint8_t, uint32_t, float); -instantiate_kernel_selection(16, 256, uint8_t, uint32_t, float); -instantiate_kernel_selection(32, 512, uint8_t, uint32_t, float); +instantiate_kernel_selection( + 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection } // namespace multi_cta_search diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index 4fc051ac09..8bfbc48898 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -75,7 +76,7 @@ __device__ void pickup_next_parents(INDEX_T* const next_parent_indices, // [sea if (new_parent) { const auto i = __popc(ballot_mask & ((1 << lane_id) - 1)) + num_new_parents; if (i < search_width) { - next_parent_indices[i] = index; + next_parent_indices[i] = j; itopk_indices[j] |= index_msb_1_mask; // set most significant bit as used node } } @@ -131,7 +132,8 @@ template + class LOAD_T, + class SAMPLE_FILTER_T> __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel( INDEX_T* const result_indices_ptr, // [num_queries, num_cta_per_query, itopk_size] DISTANCE_T* const result_distances_ptr, // [num_queries, num_cta_per_query, itopk_size] @@ -152,8 +154,8 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel( const uint32_t search_width, const uint32_t min_iteration, const uint32_t max_iteration, - uint32_t* const num_executed_iterations /* stats */ -) + uint32_t* const num_executed_iterations, /* stats */ + SAMPLE_FILTER_T sample_filter) { assert(blockDim.x == BLOCK_SIZE); assert(dataset_dim <= MAX_DATASET_DIM); @@ -287,13 +289,57 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel( local_visited_hashmap_ptr, hash_bitlen, parent_indices_buffer, + result_indices_buffer, search_width); _CLK_REC(clk_compute_distance); __syncthreads(); + // Filtering + if constexpr (!std::is_same::value) { + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const INDEX_T invalid_index = utils::get_max_value(); + + for (unsigned p = threadIdx.x; p < search_width; p += blockDim.x) { + if (parent_indices_buffer[p] != invalid_index) { + const auto parent_id = + result_indices_buffer[parent_indices_buffer[p]] & ~index_msb_1_mask; + if (!sample_filter(query_id, parent_id)) { + // If the parent must not be in the resulting top-k list, remove from the parent list + result_distances_buffer[parent_indices_buffer[p]] = utils::get_max_value(); + result_indices_buffer[parent_indices_buffer[p]] = invalid_index; + } + } + } + __syncthreads(); + } + iter++; } + // Post process for filtering + if constexpr (!std::is_same::value) { + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const INDEX_T invalid_index = utils::get_max_value(); + + for (unsigned i = threadIdx.x; i < itopk_size + search_width * graph_degree; i += blockDim.x) { + const auto node_id = result_indices_buffer[i] & ~index_msb_1_mask; + if (node_id != (invalid_index & ~index_msb_1_mask) && !sample_filter(query_id, node_id)) { + // If the parent must not be in the resulting top-k list, remove from the parent list + result_distances_buffer[i] = utils::get_max_value(); + result_indices_buffer[i] = invalid_index; + } + } + + __syncthreads(); + topk_by_bitonic_sort(result_distances_buffer, + result_indices_buffer, + itopk_size + (search_width * graph_degree), + itopk_size); + __syncthreads(); + } + for (uint32_t i = threadIdx.x; i < itopk_size; i += BLOCK_SIZE) { uint32_t j = i + (itopk_size * (cta_id + (num_cta_per_query * query_id))); if (result_distances_ptr != nullptr) { result_distances_ptr[j] = result_distances_buffer[i]; } @@ -361,7 +407,8 @@ template + typename DISTANCE_T, + typename SAMPLE_FILTER_T> struct search_kernel_config { // Search kernel function type. Note that the actual values for the template value // parameters do not matter, because they are not part of the function signature. The @@ -374,7 +421,8 @@ struct search_kernel_config { DATA_T, DISTANCE_T, INDEX_T, - device::LOAD_128BIT_T>); + device::LOAD_128BIT_T, + SAMPLE_FILTER_T>); static auto choose_buffer_size(unsigned result_buffer_size, unsigned block_size) -> kernel_t { @@ -401,7 +449,8 @@ struct search_kernel_config { DATA_T, DISTANCE_T, INDEX_T, - device::LOAD_128BIT_T>; + device::LOAD_128BIT_T, + SAMPLE_FILTER_T>; } else if (block_size == 128) { return search_kernel; + device::LOAD_128BIT_T, + SAMPLE_FILTER_T>; } else if (block_size == 256) { return search_kernel; + device::LOAD_128BIT_T, + SAMPLE_FILTER_T>; } else if (block_size == 512) { return search_kernel; + device::LOAD_128BIT_T, + SAMPLE_FILTER_T>; } else { return search_kernel; + device::LOAD_128BIT_T, + SAMPLE_FILTER_T>; } } }; @@ -450,7 +503,8 @@ template + typename DISTANCE_T, + typename SAMPLE_FILTER_T> void select_and_run( // raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, @@ -475,10 +529,12 @@ void select_and_run( // raft::resources const& res, size_t search_width, size_t min_iterations, size_t max_iterations, + SAMPLE_FILTER_T sample_filter, cudaStream_t stream) { - auto kernel = search_kernel_config:: - choose_buffer_size(result_buffer_size, block_size); + auto kernel = + search_kernel_config:: + choose_buffer_size(result_buffer_size, block_size); RAFT_CUDA_TRY( cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); @@ -489,7 +545,7 @@ void select_and_run( // raft::resources const& res, dim3 block_dims(block_size, 1, 1); dim3 grid_dims(num_cta_per_query, num_queries, 1); - RAFT_LOG_DEBUG("Launching kernel with %u threads, (%u, %u) blocks %lu smem", + RAFT_LOG_DEBUG("Launching kernel with %u threads, (%u, %u) blocks %u smem", block_size, num_cta_per_query, num_queries, @@ -513,7 +569,8 @@ void select_and_run( // raft::resources const& res, search_width, min_iterations, max_iterations, - num_executed_iterations); + num_executed_iterations, + sample_filter); } } // namespace multi_cta_search diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index f312226f42..ff1bb969e7 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -242,7 +243,7 @@ __global__ void pickup_next_parents_kernel( if (new_parent) { const auto i = __popc(ballot_mask & ((1 << threadIdx.x) - 1)) + num_new_parents; if (i < parent_list_size) { - parent_list_ptr[i + (ldd * query_id)] = index; + parent_list_ptr[i + (ldd * query_id)] = j; parent_candidates_ptr[j + (lds * query_id)] |= index_msb_1_mask; // set most significant bit as used node } @@ -306,9 +307,13 @@ template + class DISTANCE_T, + class SAMPLE_FILTER_T> __global__ void compute_distance_to_child_nodes_kernel( const INDEX_T* const parent_node_list, // [num_queries, search_width] + INDEX_T* const parent_candidates_ptr, // [num_queries, search_width] + DISTANCE_T* const parent_distance_ptr, // [num_queries, search_width] + const std::size_t lds, const std::uint32_t search_width, const DATA_T* const dataset_ptr, // [dataset_size, data_dim] const std::uint32_t data_dim, @@ -321,16 +326,25 @@ __global__ void compute_distance_to_child_nodes_kernel( const std::uint32_t hash_bitlen, INDEX_T* const result_indices_ptr, // [num_queries, ldd] DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] - const std::uint32_t ldd // (*) ldd >= search_width * graph_degree -) + const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree + SAMPLE_FILTER_T sample_filter) { const uint32_t ldb = hashmap::get_size(hash_bitlen); const auto tid = threadIdx.x + blockDim.x * blockIdx.x; const auto global_team_id = tid / TEAM_SIZE; + const auto query_id = blockIdx.y; + if (global_team_id >= search_width * graph_degree) { return; } - const std::size_t parent_index = + const std::size_t parent_list_index = parent_node_list[global_team_id / graph_degree + (search_width * blockIdx.y)]; + + if (parent_list_index == utils::get_max_value()) { return; } + + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const auto parent_index = + parent_candidates_ptr[parent_list_index + (lds * query_id)] & ~index_msb_1_mask; + if (parent_index == utils::get_max_value()) { result_distances_ptr[ldd * blockIdx.y + global_team_id] = utils::get_max_value(); return; @@ -361,15 +375,28 @@ __global__ void compute_distance_to_child_nodes_kernel( result_distances_ptr[ldd * blockIdx.y + global_team_id] = utils::get_max_value(); } } + + if constexpr (!std::is_same::value) { + if (!sample_filter(query_id, parent_index)) { + parent_candidates_ptr[parent_list_index + (lds * query_id)] = utils::get_max_value(); + parent_distance_ptr[parent_list_index + (lds * query_id)] = + utils::get_max_value(); + } + } } template + class DISTANCE_T, + class SAMPLE_FILTER_T> void compute_distance_to_child_nodes( const INDEX_T* const parent_node_list, // [num_queries, search_width] + INDEX_T* const parent_candidates_ptr, // [num_queries, search_width] + DISTANCE_T* const parent_distance_ptr, // [num_queries, search_width] + const std::size_t lds, const uint32_t search_width, const DATA_T* const dataset_ptr, // [dataset_size, data_dim] const std::uint32_t data_dim, @@ -384,6 +411,7 @@ void compute_distance_to_child_nodes( INDEX_T* const result_indices_ptr, // [num_queries, ldd] DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree + SAMPLE_FILTER_T sample_filter, cudaStream_t cuda_stream = 0) { const auto block_size = 128; @@ -392,6 +420,9 @@ void compute_distance_to_child_nodes( num_queries); compute_distance_to_child_nodes_kernel <<>>(parent_node_list, + parent_candidates_ptr, + parent_distance_ptr, + lds, search_width, dataset_ptr, data_dim, @@ -404,7 +435,8 @@ void compute_distance_to_child_nodes( hash_bitlen, result_indices_ptr, result_distances_ptr, - ldd); + ldd, + sample_filter); } template @@ -436,6 +468,50 @@ void remove_parent_bit(const std::uint32_t num_queries, num_queries, num_topk, topk_indices_ptr, ld); } +// This function called after the `remove_parent_bit` function +template +__global__ void apply_filter_kernel(INDEX_T* const result_indices_ptr, + DISTANCE_T* const result_distances_ptr, + const std::size_t lds, + const std::uint32_t result_buffer_size, + const std::uint32_t num_queries, + const INDEX_T query_id_offset, + SAMPLE_FILTER_T sample_filter) +{ + const auto tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid >= result_buffer_size * num_queries) { return; } + const auto i = tid % result_buffer_size; + const auto j = tid / result_buffer_size; + const auto index = i + j * lds; + + if (!sample_filter(query_id_offset + j, result_indices_ptr[index])) { + result_indices_ptr[index] = utils::get_max_value(); + result_distances_ptr[index] = utils::get_max_value(); + } +} + +template +void apply_filter(INDEX_T* const result_indices_ptr, + DISTANCE_T* const result_distances_ptr, + const std::size_t lds, + const std::uint32_t result_buffer_size, + const std::uint32_t num_queries, + const INDEX_T query_id_offset, + SAMPLE_FILTER_T sample_filter, + cudaStream_t cuda_stream) +{ + const std::uint32_t block_size = 256; + const std::uint32_t grid_size = ceildiv(num_queries * result_buffer_size, block_size); + + apply_filter_kernel<<>>(result_indices_ptr, + result_distances_ptr, + lds, + result_buffer_size, + num_queries, + query_id_offset, + sample_filter); +} + template __global__ void batched_memcpy_kernel(T* const dst, // [batch_size, ld_dst] const uint64_t ld_dst, @@ -508,41 +584,42 @@ template -struct search : search_plan_impl { - using search_plan_impl::max_queries; - using search_plan_impl::itopk_size; - using search_plan_impl::algo; - using search_plan_impl::team_size; - using search_plan_impl::search_width; - using search_plan_impl::min_iterations; - using search_plan_impl::max_iterations; - using search_plan_impl::thread_block_size; - using search_plan_impl::hashmap_mode; - using search_plan_impl::hashmap_min_bitlen; - using search_plan_impl::hashmap_max_fill_rate; - using search_plan_impl::num_random_samplings; - using search_plan_impl::rand_xor_mask; - - using search_plan_impl::max_dim; - using search_plan_impl::dim; - using search_plan_impl::graph_degree; - using search_plan_impl::topk; - - using search_plan_impl::hash_bitlen; - - using search_plan_impl::small_hash_bitlen; - using search_plan_impl::small_hash_reset_interval; - using search_plan_impl::hashmap_size; - using search_plan_impl::dataset_size; - using search_plan_impl::result_buffer_size; - - using search_plan_impl::smem_size; - - using search_plan_impl::hashmap; - using search_plan_impl::num_executed_iterations; - using search_plan_impl::dev_seed; - using search_plan_impl::num_seeds; + typename DISTANCE_T, + typename SAMPLE_FILTER_T> +struct search : search_plan_impl { + using search_plan_impl::max_queries; + using search_plan_impl::itopk_size; + using search_plan_impl::algo; + using search_plan_impl::team_size; + using search_plan_impl::search_width; + using search_plan_impl::min_iterations; + using search_plan_impl::max_iterations; + using search_plan_impl::thread_block_size; + using search_plan_impl::hashmap_mode; + using search_plan_impl::hashmap_min_bitlen; + using search_plan_impl::hashmap_max_fill_rate; + using search_plan_impl::num_random_samplings; + using search_plan_impl::rand_xor_mask; + + using search_plan_impl::max_dim; + using search_plan_impl::dim; + using search_plan_impl::graph_degree; + using search_plan_impl::topk; + + using search_plan_impl::hash_bitlen; + + using search_plan_impl::small_hash_bitlen; + using search_plan_impl::small_hash_reset_interval; + using search_plan_impl::hashmap_size; + using search_plan_impl::dataset_size; + using search_plan_impl::result_buffer_size; + + using search_plan_impl::smem_size; + + using search_plan_impl::hashmap; + using search_plan_impl::num_executed_iterations; + using search_plan_impl::dev_seed; + using search_plan_impl::num_seeds; size_t result_buffer_allocation_size; rmm::device_uvector result_indices; // results_indices_buffer @@ -557,7 +634,8 @@ struct search : search_plan_impl { int64_t dim, int64_t graph_degree, uint32_t topk) - : search_plan_impl(res, params, dim, graph_degree, topk), + : search_plan_impl( + res, params, dim, graph_degree, topk), result_indices(0, resource::get_cuda_stream(res)), result_distances(0, resource::get_cuda_stream(res)), parent_node_list(0, resource::get_cuda_stream(res)), @@ -602,7 +680,8 @@ struct search : search_plan_impl { const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] - uint32_t topk) + uint32_t topk, + SAMPLE_FILTER_T sample_filter) { // Init hashmap cudaStream_t stream = resource::get_cuda_stream(res); @@ -684,6 +763,9 @@ struct search : search_plan_impl { // Compute distance to child nodes that are adjacent to the parent node compute_distance_to_child_nodes( parent_node_list.data(), + result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size, + result_distances.data() + (1 - (iter & 0x1)) * result_buffer_size, + result_buffer_allocation_size, search_width, dataset.data_handle(), dataset.extent(1), @@ -698,22 +780,53 @@ struct search : search_plan_impl { result_indices.data() + itopk_size, result_distances.data() + itopk_size, result_buffer_allocation_size, + sample_filter, stream); iter++; } // while ( 1 ) + auto result_indices_ptr = result_indices.data() + (iter & 0x1) * result_buffer_size; + auto result_distances_ptr = result_distances.data() + (iter & 0x1) * result_buffer_size; // Remove parent bit in search results - remove_parent_bit(num_queries, - itopk_size, - result_indices.data() + (iter & 0x1) * result_buffer_size, - result_buffer_allocation_size, - stream); + remove_parent_bit( + num_queries, itopk_size, result_indices_ptr, result_buffer_allocation_size, stream); + + if (!std::is_same::value) { + apply_filter( + result_indices.data() + (iter & 0x1) * itopk_size, + result_distances.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_buffer_size, + num_queries, + 0, + sample_filter, + stream); + + result_indices_ptr = result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size; + result_distances_ptr = result_distances.data() + (1 - (iter & 0x1)) * result_buffer_size; + _cuann_find_topk(itopk_size, + num_queries, + result_buffer_size, + result_distances.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_indices.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_distances_ptr, + result_buffer_allocation_size, + result_indices_ptr, + result_buffer_allocation_size, + topk_workspace.data(), + true, + topk_hint.data(), + stream); + } // Copy results from working buffer to final buffer batched_memcpy(topk_indices_ptr, topk, - result_indices.data() + (iter & 0x1) * result_buffer_size, + result_indices_ptr, result_buffer_allocation_size, topk, num_queries, @@ -721,7 +834,7 @@ struct search : search_plan_impl { if (topk_distances_ptr) { batched_memcpy(topk_distances_ptr, topk, - result_distances.data() + (iter & 0x1) * result_buffer_size, + result_distances_ptr, result_buffer_allocation_size, topk, num_queries, diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh index 33c77db61e..9419385836 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh @@ -65,7 +65,7 @@ struct search_plan_impl_base : public search_params { } }; -template +template struct search_plan_impl : public search_plan_impl_base { int64_t hash_bitlen; @@ -113,7 +113,8 @@ struct search_plan_impl : public search_plan_impl_base { const std::uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] std::uint32_t* const num_executed_iterations, // [num_queries] - uint32_t topk){}; + uint32_t topk, + SAMPLE_FILTER_T sample_filter){}; void adjust_search_params() { @@ -129,13 +130,13 @@ struct search_plan_impl : public search_plan_impl_base { if (max_iterations < min_iterations) { _max_iterations = min_iterations; } if (max_iterations < _max_iterations) { RAFT_LOG_DEBUG( - "# max_iterations is increased from %u to %u.", max_iterations, _max_iterations); + "# max_iterations is increased from %lu to %u.", max_iterations, _max_iterations); max_iterations = _max_iterations; } if (itopk_size % 32) { uint32_t itopk32 = itopk_size; itopk32 += 32 - (itopk_size % 32); - RAFT_LOG_DEBUG("# internal_topk is increased from %u to %u, as it must be multiple of 32.", + RAFT_LOG_DEBUG("# internal_topk is increased from %lu to %u, as it must be multiple of 32.", itopk_size, itopk32); itopk_size = itopk32; diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh index 45dd535e1d..27d54f72cb 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh @@ -49,41 +49,42 @@ template -struct search : search_plan_impl { - using search_plan_impl::max_queries; - using search_plan_impl::itopk_size; - using search_plan_impl::algo; - using search_plan_impl::team_size; - using search_plan_impl::search_width; - using search_plan_impl::min_iterations; - using search_plan_impl::max_iterations; - using search_plan_impl::thread_block_size; - using search_plan_impl::hashmap_mode; - using search_plan_impl::hashmap_min_bitlen; - using search_plan_impl::hashmap_max_fill_rate; - using search_plan_impl::num_random_samplings; - using search_plan_impl::rand_xor_mask; + typename DISTANCE_T, + typename SAMPLE_FILTER_T> +struct search : search_plan_impl { + using search_plan_impl::max_queries; + using search_plan_impl::itopk_size; + using search_plan_impl::algo; + using search_plan_impl::team_size; + using search_plan_impl::search_width; + using search_plan_impl::min_iterations; + using search_plan_impl::max_iterations; + using search_plan_impl::thread_block_size; + using search_plan_impl::hashmap_mode; + using search_plan_impl::hashmap_min_bitlen; + using search_plan_impl::hashmap_max_fill_rate; + using search_plan_impl::num_random_samplings; + using search_plan_impl::rand_xor_mask; - using search_plan_impl::max_dim; - using search_plan_impl::dim; - using search_plan_impl::graph_degree; - using search_plan_impl::topk; + using search_plan_impl::max_dim; + using search_plan_impl::dim; + using search_plan_impl::graph_degree; + using search_plan_impl::topk; - using search_plan_impl::hash_bitlen; + using search_plan_impl::hash_bitlen; - using search_plan_impl::small_hash_bitlen; - using search_plan_impl::small_hash_reset_interval; - using search_plan_impl::hashmap_size; - using search_plan_impl::dataset_size; - using search_plan_impl::result_buffer_size; + using search_plan_impl::small_hash_bitlen; + using search_plan_impl::small_hash_reset_interval; + using search_plan_impl::hashmap_size; + using search_plan_impl::dataset_size; + using search_plan_impl::result_buffer_size; - using search_plan_impl::smem_size; + using search_plan_impl::smem_size; - using search_plan_impl::hashmap; - using search_plan_impl::num_executed_iterations; - using search_plan_impl::dev_seed; - using search_plan_impl::num_seeds; + using search_plan_impl::hashmap; + using search_plan_impl::num_executed_iterations; + using search_plan_impl::dev_seed; + using search_plan_impl::num_seeds; uint32_t num_itopk_candidates; @@ -92,7 +93,8 @@ struct search : search_plan_impl { int64_t dim, int64_t graph_degree, uint32_t topk) - : search_plan_impl(res, params, dim, graph_degree, topk) + : search_plan_impl( + res, params, dim, graph_degree, topk) { set_params(res); } @@ -111,7 +113,7 @@ struct search : search_plan_impl { RAFT_EXPECTS(itopk_size <= max_itopk, "itopk_size cannot be larger than %u", max_itopk); RAFT_LOG_DEBUG("# num_itopk_candidates: %u", num_itopk_candidates); - RAFT_LOG_DEBUG("# num_itopk: %u", itopk_size); + RAFT_LOG_DEBUG("# num_itopk: %lu", itopk_size); // // Determine the thread block size // @@ -234,7 +236,8 @@ struct search : search_plan_impl { const std::uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] std::uint32_t* const num_executed_iterations, // [num_queries] - uint32_t topk) + uint32_t topk, + SAMPLE_FILTER_T sample_filter) { cudaStream_t stream = resource::get_cuda_stream(res); select_and_run( @@ -261,6 +264,7 @@ struct search : search_plan_impl { search_width, min_iterations, max_iterations, + sample_filter, stream); } }; diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh index 5f5df1a818..35d239563a 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh @@ -15,7 +15,9 @@ */ #pragma once +#include #include // RAFT_EXPLICIT + namespace raft::neighbors::cagra::detail { namespace single_cta_search { @@ -25,7 +27,8 @@ template + typename DISTANCE_T, + typename SAMPLE_FILTER_T> void select_and_run( // raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, @@ -50,50 +53,65 @@ void select_and_run( // raft::resources const& res, size_t search_width, size_t min_iterations, size_t max_iterations, + SAMPLE_FILTER_T sample_filter, cudaStream_t stream) RAFT_EXPLICIT; #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - extern template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 1024, float, uint32_t, float); -instantiate_single_cta_select_and_run(8, 128, float, uint32_t, float); -instantiate_single_cta_select_and_run(16, 256, float, uint32_t, float); -instantiate_single_cta_select_and_run(32, 512, float, uint32_t, float); -instantiate_single_cta_select_and_run(32, 1024, int8_t, uint32_t, float); -instantiate_single_cta_select_and_run(8, 128, int8_t, uint32_t, float); -instantiate_single_cta_select_and_run(16, 256, int8_t, uint32_t, float); -instantiate_single_cta_select_and_run(32, 512, int8_t, uint32_t, float); -instantiate_single_cta_select_and_run(32, 1024, uint8_t, uint32_t, float); -instantiate_single_cta_select_and_run(8, 128, uint8_t, uint32_t, float); -instantiate_single_cta_select_and_run(16, 256, uint8_t, uint32_t, float); -instantiate_single_cta_select_and_run(32, 512, uint8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_select_and_run diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index 81325fd5da..128dc8d116 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -78,7 +79,7 @@ __device__ void pickup_next_parents(std::uint32_t* const terminate_flag, if (new_parent) { const auto i = __popc(ballot_mask & ((1 << threadIdx.x) - 1)) + num_new_parents; if (i < search_width) { - next_parent_indices[i] = index; + next_parent_indices[i] = jj; // set most significant bit as used node internal_topk_indices[jj] |= index_msb_1_mask; } @@ -458,7 +459,8 @@ template + class INDEX_T, + class SAMPLE_FILTER_T> __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel(INDEX_T* const result_indices_ptr, // [num_queries, top_k] DISTANCE_T* const result_distances_ptr, // [num_queries, top_k] @@ -482,7 +484,8 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ std::uint32_t* const num_executed_iterations, // [num_queries] const std::uint32_t hash_bitlen, const std::uint32_t small_hash_bitlen, - const std::uint32_t small_hash_reset_interval) + const std::uint32_t small_hash_reset_interval, + SAMPLE_FILTER_T sample_filter) { using LOAD_T = device::LOAD_128BIT_T; const auto query_id = blockIdx.y; @@ -527,6 +530,9 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ auto terminate_flag = reinterpret_cast(topk_ws + 3); auto smem_working_ptr = reinterpret_cast(terminate_flag + 1); + // A flag for filtering. + auto filter_flag = terminate_flag; + const DATA_T* const query_ptr = queries_ptr + query_id * dataset_dim; for (unsigned i = threadIdx.x; i < MAX_DATASET_DIM; i += BLOCK_SIZE) { unsigned j = device::swizzling(i); @@ -576,7 +582,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ std::uint32_t iter = 0; while (1) { // sort - if (TOPK_BY_BITONIC_SORT) { + if constexpr (TOPK_BY_BITONIC_SORT) { // [Notice] // It is good to use multiple warps in topk_by_bitonic_sort() when // batch size is small (short-latency), but it might not be always good @@ -614,17 +620,28 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ // topk with bitonic sort _CLK_START(); - topk_by_bitonic_sort( - result_distances_buffer, - result_indices_buffer, - internal_topk, - result_distances_buffer + internal_topk, - result_indices_buffer + internal_topk, - search_width * graph_degree, - topk_ws, - (iter == 0)); + if (std::is_same::value || + *filter_flag == 0) { + topk_by_bitonic_sort( + result_distances_buffer, + result_indices_buffer, + internal_topk, + result_distances_buffer + internal_topk, + result_indices_buffer + internal_topk, + search_width * graph_degree, + topk_ws, + (iter == 0)); + __syncthreads(); + } else { + topk_by_bitonic_sort_1st( + result_distances_buffer, + result_indices_buffer, + internal_topk + search_width * graph_degree, + internal_topk); + if (threadIdx.x == 0) { *terminate_flag = 0; } + } _CLK_REC(clk_topk); - } else { _CLK_START(); // topk with radix block sort @@ -693,12 +710,61 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ local_visited_hashmap_ptr, hash_bitlen, parent_list_buffer, + result_indices_buffer, search_width); __syncthreads(); _CLK_REC(clk_compute_distance); + // Filtering + if constexpr (!std::is_same::value) { + if (threadIdx.x == 0) { *filter_flag = 0; } + __syncthreads(); + + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const INDEX_T invalid_index = utils::get_max_value(); + + for (unsigned p = threadIdx.x; p < search_width; p += blockDim.x) { + if (parent_list_buffer[p] != invalid_index) { + const auto parent_id = result_indices_buffer[parent_list_buffer[p]] & ~index_msb_1_mask; + if (!sample_filter(query_id, parent_id)) { + // If the parent must not be in the resulting top-k list, remove from the parent list + result_distances_buffer[parent_list_buffer[p]] = utils::get_max_value(); + result_indices_buffer[parent_list_buffer[p]] = invalid_index; + *filter_flag = 1; + } + } + } + __syncthreads(); + } + iter++; } + + // Post process for filtering + if constexpr (!std::is_same::value) { + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const INDEX_T invalid_index = utils::get_max_value(); + + for (unsigned i = threadIdx.x; i < internal_topk + search_width * graph_degree; + i += blockDim.x) { + const auto node_id = result_indices_buffer[i] & ~index_msb_1_mask; + if (node_id != (invalid_index & ~index_msb_1_mask) && !sample_filter(query_id, node_id)) { + result_distances_buffer[i] = utils::get_max_value(); + result_indices_buffer[i] = invalid_index; + } + } + + __syncthreads(); + topk_by_bitonic_sort_1st( + result_distances_buffer, + result_indices_buffer, + internal_topk + search_width * graph_degree, + top_k); + __syncthreads(); + } + for (std::uint32_t i = threadIdx.x; i < top_k; i += BLOCK_SIZE) { unsigned j = i + (top_k * query_id); unsigned ii = i; @@ -737,9 +803,15 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ #endif } -template +template struct search_kernel_config { - using kernel_t = decltype(&search_kernel); + using kernel_t = + decltype(&search_kernel); template static auto choose_block_size(unsigned block_size) -> kernel_t @@ -747,24 +819,104 @@ struct search_kernel_config { constexpr unsigned BS = USE_BITONIC_SORT; if constexpr (BS) { if (block_size == 64) { - return search_kernel; + return search_kernel; } else if (block_size == 128) { - return search_kernel; + return search_kernel; } else if (block_size == 256) { - return search_kernel; + return search_kernel; } else if (block_size == 512) { - return search_kernel; + return search_kernel; } else { - return search_kernel; + return search_kernel; } } else { if (block_size == 256) { - return search_kernel; + return search_kernel; } else if (block_size == 512) { - return search_kernel; + return search_kernel; } else { - return search_kernel; + return search_kernel; } } } @@ -826,7 +978,8 @@ template + typename DISTANCE_T, + typename SAMPLE_FILTER_T> void select_and_run( // raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, @@ -851,16 +1004,18 @@ void select_and_run( // raft::resources const& res, size_t search_width, size_t min_iterations, size_t max_iterations, + SAMPLE_FILTER_T sample_filter, cudaStream_t stream) { - auto kernel = search_kernel_config:: - choose_itopk_and_mx_candidates(itopk_size, num_itopk_candidates, block_size); + auto kernel = + search_kernel_config:: + choose_itopk_and_mx_candidates(itopk_size, num_itopk_candidates, block_size); RAFT_CUDA_TRY( cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); dim3 thread_dims(block_size, 1, 1); dim3 block_dims(1, num_queries, 1); RAFT_LOG_DEBUG( - "Launching kernel with %u threads, %u block %lu smem", block_size, num_queries, smem_size); + "Launching kernel with %u threads, %u block %u smem", block_size, num_queries, smem_size); kernel<<>>(topk_indices_ptr, topk_distances_ptr, topk, @@ -883,7 +1038,8 @@ void select_and_run( // raft::resources const& res, num_executed_iterations, hash_bitlen, small_hash_bitlen, - small_hash_reset_interval); + small_hash_reset_interval, + sample_filter); RAFT_CUDA_TRY(cudaPeekAtLastError()); } } // namespace single_cta_search diff --git a/cpp/include/raft/neighbors/ivf_flat-inl.cuh b/cpp/include/raft/neighbors/ivf_flat-inl.cuh index a18ee065bf..6641346a67 100644 --- a/cpp/include/raft/neighbors/ivf_flat-inl.cuh +++ b/cpp/include/raft/neighbors/ivf_flat-inl.cuh @@ -342,7 +342,7 @@ void extend(raft::resources const& handle, /** @} */ /** - * @brief Search ANN using the constructed index. + * @brief Search ANN using the constructed index with the given filter. * * See the [ivf_flat::build](#ivf_flat::build) documentation for a usage example. * @@ -374,6 +374,8 @@ void extend(raft::resources const& handle, * * @tparam T data element type * @tparam IdxT type of the indices + * @tparam IvfSampleFilterT Device filter function, with the signature + * `(uint32_t query_ix, uint32 cluster_ix, uint32_t sample_ix) -> bool` * * @param[in] handle * @param[in] params configure the search @@ -386,7 +388,7 @@ void extend(raft::resources const& handle, * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k] * @param[in] mr an optional memory resource to use across the searches (you can provide a large * enough memory pool here to avoid memory allocations within search). - * @param[in] sample_filter a filter the greenlights samples for a given query + * @param[in] sample_filter a device filter function that greenlights samples for a given query */ template void search_with_filtering(raft::resources const& handle, @@ -475,7 +477,7 @@ void search(raft::resources const& handle, */ /** - * @brief Search ANN using the constructed index using the given filter. + * @brief Search ANN using the constructed index with the given filter. * * See the [ivf_flat::build](#ivf_flat::build) documentation for a usage example. * @@ -501,6 +503,8 @@ void search(raft::resources const& handle, * * @tparam T data element type * @tparam IdxT type of the indices + * @tparam IvfSampleFilterT Device filter function, with the signature + * `(uint32_t query_ix, uint32 cluster_ix, uint32_t sample_ix) -> bool` * * @param[in] handle * @param[in] params configure the search @@ -509,7 +513,7 @@ void search(raft::resources const& handle, * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset * [n_queries, k] * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k] - * @param[in] sample_filter a filter the greenlights samples for a given query + * @param[in] sample_filter a device filter function that greenlights samples for a given query */ template void search_with_filtering(raft::resources const& handle, diff --git a/cpp/include/raft/neighbors/ivf_pq-inl.cuh b/cpp/include/raft/neighbors/ivf_pq-inl.cuh index ccf8717486..9f203d92fb 100644 --- a/cpp/include/raft/neighbors/ivf_pq-inl.cuh +++ b/cpp/include/raft/neighbors/ivf_pq-inl.cuh @@ -134,7 +134,7 @@ void extend(raft::resources const& handle, } /** - * @brief Search ANN using the constructed index using the given filter. + * @brief Search ANN using the constructed index with the given filter. * * See the [ivf_pq::build](#ivf_pq::build) documentation for a usage example. * @@ -148,6 +148,8 @@ void extend(raft::resources const& handle, * * @tparam T data element type * @tparam IdxT type of the indices + * @tparam IvfSampleFilterT Device filter function, with the signature + * `(uint32_t query_ix, uint32 cluster_ix, uint32_t sample_ix) -> bool` * * @param[in] handle * @param[in] params configure the search @@ -157,7 +159,7 @@ void extend(raft::resources const& handle, * [n_queries, k] * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, * k] - * @param[in] sample_filter a filter the greenlights samples for a given query. + * @param[in] sample_filter a device filter function that greenlights samples for a given query. */ template void search_with_filtering(raft::resources const& handle, @@ -343,7 +345,7 @@ void extend(raft::resources const& handle, } /** - * @brief Search ANN using the constructed index using the given filter. + * @brief Search ANN using the constructed index with the given filter. * * See the [ivf_pq::build](#ivf_pq::build) documentation for a usage example. * @@ -372,6 +374,8 @@ void extend(raft::resources const& handle, * * @tparam T data element type * @tparam IdxT type of the indices + * @tparam IvfSampleFilterT Device filter function, with the signature + * `(uint32_t query_ix, uint32 cluster_ix, uint32_t sample_ix) -> bool` * * @param[in] handle * @param[in] params configure the search @@ -382,7 +386,7 @@ void extend(raft::resources const& handle, * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset * [n_queries, k] * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k] - * @param[in] sample_filter a filter the greenlights samples for a given query + * @param[in] sample_filter a device filter function that greenlights samples for a given query */ template void search_with_filtering(raft::resources const& handle, diff --git a/cpp/include/raft/neighbors/sample_filter_types.hpp b/cpp/include/raft/neighbors/sample_filter_types.hpp index 5a301e9d2f..10c5e99372 100644 --- a/cpp/include/raft/neighbors/sample_filter_types.hpp +++ b/cpp/include/raft/neighbors/sample_filter_types.hpp @@ -37,6 +37,18 @@ struct none_ivf_sample_filter { } }; +/* A filter that filters nothing. This is the default behavior. */ +struct none_cagra_sample_filter { + inline _RAFT_HOST_DEVICE bool operator()( + // query index + const uint32_t query_ix, + // the index of the current sample + const uint32_t sample_ix) const + { + return true; + } +}; + /** * If the filtering depends on the index of a sample, then the following * filter template can be used: diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py index 784d116503..15eb0a9e65 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py @@ -39,41 +39,45 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \\ - template void select_and_run( \\ - raft::device_matrix_view dataset, \\ - raft::device_matrix_view graph, \\ - INDEX_T* const topk_indices_ptr, \\ - DISTANCE_T* const topk_distances_ptr, \\ - const DATA_T* const queries_ptr, \\ - const uint32_t num_queries, \\ - const INDEX_T* dev_seed_ptr, \\ - uint32_t* const num_executed_iterations, \\ - uint32_t topk, \\ - uint32_t block_size, \\ - uint32_t result_buffer_size, \\ - uint32_t smem_size, \\ - int64_t hash_bitlen, \\ - INDEX_T* hashmap_ptr, \\ - uint32_t num_cta_per_query, \\ - uint32_t num_random_samplings, \\ - uint64_t rand_xor_mask, \\ - uint32_t num_seeds, \\ - size_t itopk_size, \\ - size_t search_width, \\ - size_t min_iterations, \\ - size_t max_iterations, \\ - cudaStream_t stream); +#define instantiate_kernel_selection( \\ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \\ + template void \\ + select_and_run( \\ + raft::device_matrix_view dataset, \\ + raft::device_matrix_view graph, \\ + INDEX_T* const topk_indices_ptr, \\ + DISTANCE_T* const topk_distances_ptr, \\ + const DATA_T* const queries_ptr, \\ + const uint32_t num_queries, \\ + const INDEX_T* dev_seed_ptr, \\ + uint32_t* const num_executed_iterations, \\ + uint32_t topk, \\ + uint32_t block_size, \\ + uint32_t result_buffer_size, \\ + uint32_t smem_size, \\ + int64_t hash_bitlen, \\ + INDEX_T* hashmap_ptr, \\ + uint32_t num_cta_per_query, \\ + uint32_t num_random_samplings, \\ + uint64_t rand_xor_mask, \\ + uint32_t num_seeds, \\ + size_t itopk_size, \\ + size_t search_width, \\ + size_t min_iterations, \\ + size_t max_iterations, \\ + SAMPLE_FILTER_T sample_filter, \\ + cudaStream_t stream); """ trailer = """ #undef instantiate_kernel_selection -} // namespace raft::neighbors::cagra::detail::namespace multi_cta_search +} // namespace raft::neighbors::cagra::detail::multi_cta_search """ mxdim_team = [(128, 8), (256, 16), (512, 32), (1024, 32)] @@ -97,7 +101,7 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_kernel_selection({team}, {mxdim}, {data_t}, {idx_t}, {distance_t});\n" + f"instantiate_kernel_selection(\n {team}, {mxdim}, {data_t}, {idx_t}, {distance_t}, raft::neighbors::filtering::none_cagra_sample_filter);\n" ) f.write(trailer) # For pasting into CMakeLists.txt diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu index 2a4e7ac607..1a3b2284bd 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_widthhhhhhhhh, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 1024, float, uint32_t, float); +instantiate_kernel_selection( + 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu index 115ce3b48b..36e86d9ed6 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(8, 128, float, uint32_t, float); +instantiate_kernel_selection( + 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu index c5e704a85f..6f1af2d93f 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(16, 256, float, uint32_t, float); +instantiate_kernel_selection( + 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu index 3469facf39..1279f8e415 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 512, float, uint32_t, float); +instantiate_kernel_selection( + 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu index 327bfc73b4..0dabff0df5 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 1024, float, uint64_t, float); +instantiate_kernel_selection( + 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu index 1abe0cd8af..72bb74cdb8 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(8, 128, float, uint64_t, float); +instantiate_kernel_selection( + 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu index dd61810d06..dceea10b5d 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(16, 256, float, uint64_t, float); +instantiate_kernel_selection( + 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu index 8e12bab514..acb8bd6a12 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 512, float, uint64_t, float); +instantiate_kernel_selection( + 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu index d946ac9c79..0254f09ff0 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 1024, int8_t, uint32_t, float); +instantiate_kernel_selection( + 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu index e4d7b44d1e..2b67e7e968 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(8, 128, int8_t, uint32_t, float); +instantiate_kernel_selection( + 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu index b8dc3b38a8..17d6722e58 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(16, 256, int8_t, uint32_t, float); +instantiate_kernel_selection( + 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu index 749b35bad6..38f02812e2 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 512, int8_t, uint32_t, float); +instantiate_kernel_selection( + 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu index 428d460ba8..fa111196c6 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_widthh, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 1024, uint8_t, uint32_t, float); +instantiate_kernel_selection( + 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu index 28a20b865e..1ef3c28aa3 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(8, 128, uint8_t, uint32_t, float); +instantiate_kernel_selection( + 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu index e85a84ae8e..d26cb44843 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(16, 256, uint8_t, uint32_t, float); +instantiate_kernel_selection( + 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu index 232b62ebcd..4d4322f261 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 512, uint8_t, uint32_t, float); +instantiate_kernel_selection( + 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py index cf61a45b4a..249555082e 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py @@ -39,35 +39,38 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \\ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \\ - template void select_and_run( \\ - raft::device_matrix_view dataset, \\ - raft::device_matrix_view graph, \\ - INDEX_T* const topk_indices_ptr, \\ - DISTANCE_T* const topk_distances_ptr, \\ - const DATA_T* const queries_ptr, \\ - const uint32_t num_queries, \\ - const INDEX_T* dev_seed_ptr, \\ - uint32_t* const num_executed_iterations, \\ - uint32_t topk, \\ - uint32_t num_itopk_candidates, \\ - uint32_t block_size, \\ - uint32_t smem_size, \\ - int64_t hash_bitlen, \\ - INDEX_T* hashmap_ptr, \\ - size_t small_hash_bitlen, \\ - size_t small_hash_reset_interval, \\ - uint32_t num_random_samplings, \\ - uint64_t rand_xor_mask, \\ - uint32_t num_seeds, \\ - size_t itopk_size, \\ - size_t search_width, \\ - size_t min_iterations, \\ - size_t max_iterations, \\ +#define instantiate_single_cta_select_and_run( \\ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \\ + template void \\ + select_and_run( \\ + raft::device_matrix_view dataset, \\ + raft::device_matrix_view graph, \\ + INDEX_T* const topk_indices_ptr, \\ + DISTANCE_T* const topk_distances_ptr, \\ + const DATA_T* const queries_ptr, \\ + const uint32_t num_queries, \\ + const INDEX_T* dev_seed_ptr, \\ + uint32_t* const num_executed_iterations, \\ + uint32_t topk, \\ + uint32_t num_itopk_candidates, \\ + uint32_t block_size, \\ + uint32_t smem_size, \\ + int64_t hash_bitlen, \\ + INDEX_T* hashmap_ptr, \\ + size_t small_hash_bitlen, \\ + size_t small_hash_reset_interval, \\ + uint32_t num_random_samplings, \\ + uint64_t rand_xor_mask, \\ + uint32_t num_seeds, \\ + size_t itopk_size, \\ + size_t search_width, \\ + size_t min_iterations, \\ + size_t max_iterations, \\ + SAMPLE_FILTER_T sample_filter, \\ cudaStream_t stream); """ @@ -75,7 +78,7 @@ trailer = """ #undef instantiate_single_cta_search_kernel -} // namespace raft::neighbors::cagra::detail::single_cta_search +} // namespace raft::neighbors::cagra::detail::single_cta_search """ mxdim_team = [(128, 8), (256, 16), (512, 32), (1024, 32)] @@ -102,7 +105,7 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_single_cta_select_and_run({team}, {mxdim},{data_t}, {idx_t}, {distance_t});\n" + f"instantiate_single_cta_select_and_run(\n {team}, {mxdim}, {data_t}, {idx_t}, {distance_t}, raft::neighbors::filtering::none_cagra_sample_filter);\n" ) f.write(trailer) diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu index eb45d4ff08..b8c23103ba 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 1024, float, uint32_t, float); +instantiate_single_cta_select_and_run( + 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu index 049715aa20..8ab1897119 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(8, 128, float, uint32_t, float); +instantiate_single_cta_select_and_run( + 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu index 6028c283db..9fd36b4cb9 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(16, 256, float, uint32_t, float); +instantiate_single_cta_select_and_run( + 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu index 2566e9cbd9..a9ee2c864b 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 512, float, uint32_t, float); +instantiate_single_cta_select_and_run( + 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu index 4cd96ad9c0..dadc574b65 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 1024, float, uint64_t, float); +instantiate_single_cta_select_and_run( + 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu index 822a2efb2f..30e043f47e 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(8, 128, float, uint64_t, float); +instantiate_single_cta_select_and_run( + 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu index 80d1f76b9b..089e4c930f 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(16, 256, float, uint64_t, float); +instantiate_single_cta_select_and_run( + 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu index 06c3eaf10b..3e8ffb8bf8 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 512, float, uint64_t, float); +instantiate_single_cta_select_and_run( + 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu index b4c30ac943..279587738e 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 1024, int8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu index c8d0df3ac4..ef127d3f7d 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(8, 128, int8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu index 19ecee91af..7fcfdcc28e 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(16, 256, int8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu index 52c4eb7d6b..a6c606d99b 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 512, int8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu index 4675e17084..0b8be56614 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 1024, uint8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu index e73e1071ee..4c193b9408 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(8, 128, uint8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu index 01e26b5f29..bdf16d2f03 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(16, 256, uint8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu index b0534b555f..93624df4aa 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 512, uint8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index eadc88085f..90f271e3ee 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -15,6 +15,8 @@ */ #pragma once +#undef RAFT_EXPLICIT_INSTANTIATE_ONLY // Search with filter instantiation + #include "../test_utils.cuh" #include "ann_utils.cuh" #include @@ -25,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -41,8 +44,22 @@ #include #include -namespace raft::neighbors::experimental::cagra { +namespace raft::neighbors::cagra { namespace { + +/* A filter that excludes all indices below `offset`. */ +struct test_cagra_sample_filter { + static constexpr unsigned offset = 400; + inline _RAFT_HOST_DEVICE auto operator()( + // query index + const uint32_t query_ix, + // the index of the current sample inside the current inverted list + const uint32_t sample_ix) const + { + return sample_ix >= offset; + } +}; + // For sort_knn_graph test template void RandomSuffle(raft::host_matrix_view index) @@ -365,6 +382,162 @@ class AnnCagraSortTest : public ::testing::TestWithParam { rmm::device_uvector database; }; +template +class AnnCagraFilterTest : public ::testing::TestWithParam { + public: + AnnCagraFilterTest() + : stream_(resource::get_cuda_stream(handle_)), + ps(::testing::TestWithParam::GetParam()), + database(0, stream_), + search_queries(0, stream_) + { + } + + protected: + void testCagraFilter() + { + size_t queries_size = ps.n_queries * ps.k; + std::vector indices_Cagra(queries_size); + std::vector indices_naive(queries_size); + std::vector distances_Cagra(queries_size); + std::vector distances_naive(queries_size); + + { + rmm::device_uvector distances_naive_dev(queries_size, stream_); + rmm::device_uvector indices_naive_dev(queries_size, stream_); + auto* database_filtered_ptr = database.data() + test_cagra_sample_filter::offset * ps.dim; + naive_knn(handle_, + distances_naive_dev.data(), + indices_naive_dev.data(), + search_queries.data(), + database_filtered_ptr, + ps.n_queries, + ps.n_rows - test_cagra_sample_filter::offset, + ps.dim, + ps.k, + ps.metric); + raft::linalg::addScalar(indices_naive_dev.data(), + indices_naive_dev.data(), + IdxT(test_cagra_sample_filter::offset), + queries_size, + stream_); + update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); + update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); + resource::sync_stream(handle_); + } + + { + rmm::device_uvector distances_dev(queries_size, stream_); + rmm::device_uvector indices_dev(queries_size, stream_); + + { + cagra::index_params index_params; + index_params.metric = ps.metric; // Note: currently ony the cagra::index_params metric is + // not used for knn_graph building. + cagra::search_params search_params; + search_params.algo = ps.algo; + search_params.max_queries = ps.max_queries; + search_params.team_size = ps.team_size; + search_params.hashmap_mode = cagra::hash_mode::HASH; + + auto database_view = raft::make_device_matrix_view( + (const DataT*)database.data(), ps.n_rows, ps.dim); + + cagra::index index(handle_); + if (ps.host_dataset) { + auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); + auto database_host_view = raft::make_host_matrix_view( + (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); + index = cagra::build(handle_, index_params, database_host_view); + } else { + index = cagra::build(handle_, index_params, database_view); + } + + if (!ps.include_serialized_dataset) { index.update_dataset(handle_, database_view); } + + auto search_queries_view = raft::make_device_matrix_view( + search_queries.data(), ps.n_queries, ps.dim); + auto indices_out_view = + raft::make_device_matrix_view(indices_dev.data(), ps.n_queries, ps.k); + auto dists_out_view = raft::make_device_matrix_view( + distances_dev.data(), ps.n_queries, ps.k); + + cagra::search_with_filtering(handle_, + search_params, + index, + search_queries_view, + indices_out_view, + dists_out_view, + test_cagra_sample_filter()); + update_host(distances_Cagra.data(), distances_dev.data(), queries_size, stream_); + update_host(indices_Cagra.data(), indices_dev.data(), queries_size, stream_); + resource::sync_stream(handle_); + } + + // Test filter + bool unacceptable_node = false; + for (int q = 0; q < ps.n_queries; q++) { + for (int i = 0; i < ps.k; i++) { + const auto n = indices_Cagra[q * ps.k + i]; + unacceptable_node = unacceptable_node | !test_cagra_sample_filter()(q, n); + } + } + EXPECT_FALSE(unacceptable_node); + + double min_recall = ps.min_recall; + EXPECT_TRUE(eval_neighbours(indices_naive, + indices_Cagra, + distances_naive, + distances_Cagra, + ps.n_queries, + ps.k, + 0.001, + min_recall)); + EXPECT_TRUE(eval_distances(handle_, + database.data(), + search_queries.data(), + indices_dev.data(), + distances_dev.data(), + ps.n_rows, + ps.dim, + ps.n_queries, + ps.k, + ps.metric, + 1.0e-4)); + } + } + + void SetUp() override + { + database.resize(((size_t)ps.n_rows) * ps.dim, stream_); + search_queries.resize(ps.n_queries * ps.dim, stream_); + raft::random::Rng r(1234ULL); + if constexpr (std::is_same{}) { + r.normal(database.data(), ps.n_rows * ps.dim, DataT(0.1), DataT(2.0), stream_); + r.normal(search_queries.data(), ps.n_queries * ps.dim, DataT(0.1), DataT(2.0), stream_); + } else { + r.uniformInt(database.data(), ps.n_rows * ps.dim, DataT(1), DataT(20), stream_); + r.uniformInt(search_queries.data(), ps.n_queries * ps.dim, DataT(1), DataT(20), stream_); + } + resource::sync_stream(handle_); + } + + void TearDown() override + { + resource::sync_stream(handle_); + database.resize(0, stream_); + search_queries.resize(0, stream_); + } + + private: + raft::resources handle_; + rmm::cuda_stream_view stream_; + AnnCagraInputs ps; + rmm::device_uvector database; + rmm::device_uvector search_queries; +}; + inline std::vector generate_inputs() { // TODO(tfeher): test MULTI_CTA kernel with search_width > 1 to allow multiple CTA per queries @@ -467,4 +640,4 @@ inline std::vector generate_inputs() const std::vector inputs = generate_inputs(); -} // namespace raft::neighbors::experimental::cagra +} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh b/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh index f61e476652..175e4ef483 100644 --- a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh +++ b/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh @@ -1,93 +1,107 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include // RAFT_EXPLICIT - -namespace raft::neighbors::cagra::detail { - -namespace multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - extern template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - cudaStream_t stream); - -instantiate_kernel_selection(32, 1024, float, uint64_t, float); -instantiate_kernel_selection(8, 128, float, uint64_t, float); -instantiate_kernel_selection(16, 256, float, uint64_t, float); -instantiate_kernel_selection(32, 512, float, uint64_t, float); - -#undef instantiate_kernel_selection -} // namespace multi_cta_search - -namespace single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - extern template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(32, 1024, float, uint64_t, float); -instantiate_single_cta_select_and_run(8, 128, float, uint64_t, float); -instantiate_single_cta_select_and_run(16, 256, float, uint64_t, float); -instantiate_single_cta_select_and_run(32, 512, float, uint64_t, float); - -} // namespace single_cta_search -} // namespace raft::neighbors::cagra::detail +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include // none_cagra_sample_filter +#include // RAFT_EXPLICIT + +namespace raft::neighbors::cagra::detail { + +namespace multi_cta_search { +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection +} // namespace multi_cta_search + +namespace single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace single_cta_search +} // namespace raft::neighbors::cagra::detail \ No newline at end of file diff --git a/cpp/test/neighbors/ann_cagra/test_float_int64_t.cu b/cpp/test/neighbors/ann_cagra/test_float_int64_t.cu index fa3d76d066..6f9e8dbd43 100644 --- a/cpp/test/neighbors/ann_cagra/test_float_int64_t.cu +++ b/cpp/test/neighbors/ann_cagra/test_float_int64_t.cu @@ -19,11 +19,11 @@ #include "../ann_cagra.cuh" #include "search_kernel_uint64_t.cuh" -namespace raft::neighbors::experimental::cagra { +namespace raft::neighbors::cagra { typedef AnnCagraTest AnnCagraTestF_I64; TEST_P(AnnCagraTestF_I64, AnnCagra) { this->testCagra(); } INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestF_I64, ::testing::ValuesIn(inputs)); -} // namespace raft::neighbors::experimental::cagra +} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu index dbaf4dedd9..01d7e1e1ea 100644 --- a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu +++ b/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu @@ -18,7 +18,7 @@ #include "../ann_cagra.cuh" -namespace raft::neighbors::experimental::cagra { +namespace raft::neighbors::cagra { typedef AnnCagraTest AnnCagraTestF_U32; TEST_P(AnnCagraTestF_U32, AnnCagra) { this->testCagra(); } @@ -26,7 +26,11 @@ TEST_P(AnnCagraTestF_U32, AnnCagra) { this->testCagra(); } typedef AnnCagraSortTest AnnCagraSortTestF_U32; TEST_P(AnnCagraSortTestF_U32, AnnCagraSort) { this->testCagraSort(); } +typedef AnnCagraFilterTest AnnCagraFilterTestF_U32; +TEST_P(AnnCagraFilterTestF_U32, AnnCagraFilter) { this->testCagraFilter(); } + INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestF_U32, ::testing::ValuesIn(inputs)); INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestF_U32, ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest, AnnCagraFilterTestF_U32, ::testing::ValuesIn(inputs)); -} // namespace raft::neighbors::experimental::cagra +} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu index ba60131677..ee06d369fa 100644 --- a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu +++ b/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu @@ -18,14 +18,17 @@ #include "../ann_cagra.cuh" -namespace raft::neighbors::experimental::cagra { +namespace raft::neighbors::cagra { typedef AnnCagraTest AnnCagraTestI8_U32; TEST_P(AnnCagraTestI8_U32, AnnCagra) { this->testCagra(); } typedef AnnCagraSortTest AnnCagraSortTestI8_U32; TEST_P(AnnCagraSortTestI8_U32, AnnCagraSort) { this->testCagraSort(); } +typedef AnnCagraFilterTest AnnCagraFilterTestI8_U32; +TEST_P(AnnCagraFilterTestI8_U32, AnnCagraFilter) { this->testCagraFilter(); } INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestI8_U32, ::testing::ValuesIn(inputs)); INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestI8_U32, ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest, AnnCagraFilterTestI8_U32, ::testing::ValuesIn(inputs)); -} // namespace raft::neighbors::experimental::cagra +} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu index cc172e4833..3243e73ccd 100644 --- a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu +++ b/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu @@ -18,7 +18,7 @@ #include "../ann_cagra.cuh" -namespace raft::neighbors::experimental::cagra { +namespace raft::neighbors::cagra { typedef AnnCagraTest AnnCagraTestU8_U32; TEST_P(AnnCagraTestU8_U32, AnnCagra) { this->testCagra(); } @@ -26,7 +26,11 @@ TEST_P(AnnCagraTestU8_U32, AnnCagra) { this->testCagra(); } typedef AnnCagraSortTest AnnCagraSortTestU8_U32; TEST_P(AnnCagraSortTestU8_U32, AnnCagraSort) { this->testCagraSort(); } +typedef AnnCagraFilterTest AnnCagraFilterTestU8_U32; +TEST_P(AnnCagraFilterTestU8_U32, AnnCagraSort) { this->testCagraFilter(); } + INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestU8_U32, ::testing::ValuesIn(inputs)); INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestU8_U32, ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest, AnnCagraFilterTestU8_U32, ::testing::ValuesIn(inputs)); -} // namespace raft::neighbors::experimental::cagra +} // namespace raft::neighbors::cagra