From 652f702b8a6b3e7eed0453e35546bef81ccb2ef3 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 19 Oct 2023 22:22:09 -0400 Subject: [PATCH 1/5] Adding simple doc for dataset and the corresponding config. Need to get more clarify on how the dataset was generated. --- docs/source/wiki_all_dataset.md | 30 +++ .../src/raft-ann-bench/run/conf/wiki_all.json | 200 ++++++++++++++++++ 2 files changed, 230 insertions(+) create mode 100644 docs/source/wiki_all_dataset.md create mode 100644 python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all.json diff --git a/docs/source/wiki_all_dataset.md b/docs/source/wiki_all_dataset.md new file mode 100644 index 0000000000..b3a336db1d --- /dev/null +++ b/docs/source/wiki_all_dataset.md @@ -0,0 +1,30 @@ +# Wiki-all Dataset + +The `wiki-all` dataset was created to stress vector search algorithms at scale with both a large number of vectors and dimensions. The entire dataset contains 88M vectors with 768 dimensions and is meant for testing the types of vectors one would typically encounter in retrieval augmented generation (RAG) workloads. The full dataset is ~251GB in size, which is intentionally larger than the typical memory of GPUs. The massive scale is intended to promote the use of compression and efficient out-of-core methods for both indexing and search. + +## Getting the dataset + +The dataset is composed of all the available languages of in the [Cohere Wikipedia dataset](https://huggingface.co/datasets/Cohere/wikipedia-22-12). An [English version]( https://www.kaggle.com/datasets/jjinho/wikipedia-20230701) is also available. + +TODO: Brief summary of how the dataset was created + +A version of the dataset is available in the format that can be used directly by the [raft-ann-bench]() tool. It's ~251GB, and has been split into multiple parts. + +The following will download all 10 the parts and untar them to a `wiki_all` directory: +```bash +curl -s https://data.rapids.ai/raft/datasets/wiki_all/wiki_all.tar.{00..9} | tar -xf - -C /datasets/wiki_all/ +``` + +The above has the unfortunate drawback that if the command should fail for any reason, it cannot be restarted. The files can also be downloaded individually and then untarred to the directory. Each file is ~27GB and there are 10 of them. + +```bash +curl -s https://data.rapids.ai/raft/datasets/wiki_all/wiki_all.tar.00 +... +curl -s https://data.rapids.ai/raft/datasets/wiki_all/wiki_all.tar.09 + +cat wiki_all.tar.* | tar -xf - -C /datasets/wiki_all/ +```zsx + +## Using the dataset + +After the dataset is downloaded and extracted to the `wiki_all` directory, the files can be used in the benchmarking tool. The dataset name is `wiki_all`, and the benchmarking tool can be used by specifying `--dataset wiki_all` in the scripts. \ No newline at end of file diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all.json new file mode 100644 index 0000000000..00f232310e --- /dev/null +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all.json @@ -0,0 +1,200 @@ +{ + "dataset": { + "name": "wiki_all", + "base_file": "wiki_all/base.88M.fbin", + "query_file": "wiki_all/queries.fbin", + "groundtruth_neighbors_file": "wiki_all/groundtruth.88M.neighbors.ibin", + "distance": "euclidean" + }, + "search_basic_param": { + "batch_size": 10000, + "k": 10 + }, + "index": [ + { + "name": "hnswlib.M16.ef50", + "algo": "hnswlib", + "build_param": { "M": 16, "efConstruction": 50, "numThreads": 56 }, + "file": "wiki_all/hnswlib/M16.ef50", + "search_params": [ + { "ef": 10, "numThreads": 56 }, + { "ef": 20, "numThreads": 56 }, + { "ef": 40, "numThreads": 56 }, + { "ef": 60, "numThreads": 56 }, + { "ef": 80, "numThreads": 56 }, + { "ef": 120, "numThreads": 56 }, + { "ef": 200, "numThreads": 56 }, + { "ef": 400, "numThreads": 56 }, + { "ef": 600, "numThreads": 56 }, + { "ef": 800, "numThreads": 56 } + ] + }, + { + "name": "faiss_ivf_pq.M32-nlist16K", + "algo": "faiss_gpu_ivf_pq", + "build_param": { + "M": 32, + "nlist": 16384, + "ratio": 2 + }, + "file": "wiki_all/faiss_ivf_pq/M32-nlist16K_ratio2", + "search_params": [ + { "nprobe": 10 }, + { "nprobe": 20 }, + { "nprobe": 30 }, + { "nprobe": 40 }, + { "nprobe": 50 }, + { "nprobe": 100 }, + { "nprobe": 200 }, + { "nprobe": 500 } + ] + }, + { + "name": "faiss_ivf_pq.M64-nlist16K", + "algo": "faiss_gpu_ivf_pq", + "build_param": { + "M": 64, + "nlist": 16384, + "ratio": 2 + }, + "file": "wiki_all/faiss_ivf_pq/M64-nlist16K_ratio2", + "search_params": [ + { "nprobe": 10 }, + { "nprobe": 20 }, + { "nprobe": 30 }, + { "nprobe": 40 }, + { "nprobe": 50 }, + { "nprobe": 100 }, + { "nprobe": 200 }, + { "nprobe": 500 } + ] + }, + { + "name": "raft_ivf_pq.d128-nlist16K", + "algo": "raft_ivf_pq", + "build_param": { + "pq_dim": 128, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "file": "wiki_all/raft_ivf_pq/d128-nlist16K", + "search_params": [ + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 } + ] + }, + { + "name": "raft_ivf_pq.d64-nlist16K", + "algo": "raft_ivf_pq", + "build_param": { + "pq_dim": 64, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "file": "wiki_all/raft_ivf_pq/d64-nlist16K", + "search_params": [ + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 } + ] + }, + { + "name": "raft_ivf_pq.d32-nlist16K", + "algo": "raft_ivf_pq", + "build_param": { + "pq_dim": 32, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "file": "wiki_all/raft_ivf_pq/d32-nlist16K", + "search_params": [ + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 } + ] + }, + { + "name": "raft_ivf_pq.d32X-nlist16K", + "algo": "raft_ivf_pq", + "build_param": { + "pq_dim": 32, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "file": "wiki_all/raft_ivf_pq/d32-nlist16K", + "search_params": [ + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 } + + ] + }, + { + "name": "raft_cagra.dim32.multi_cta", + "algo": "raft_cagra", + "build_param": { "graph_degree": 32, "intermediate_graph_degree": 48 }, + "file": "wiki_all/raft_cagra/dim32.ibin", + "search_params": [ + { "itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 36, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 40, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 44, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 48, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 16, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 24, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 26, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 32, "algo": "multi_cta" }, + { "itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_cta" }, + { "itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_cta" }, + { "itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_cta" }, + { "itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_cta" }, + { "itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_cta" }, + { "itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_cta" }, + { "itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_cta" }, + { "itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_cta" }, + { "itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_cta" }, + { "itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_cta" } + ] + } + + ] +} + From 306d94d6e999430bd3ec63e8392e13037ba03ed4 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Fri, 20 Oct 2023 21:50:38 -0400 Subject: [PATCH 2/5] Adding Jiwei's summary to wiki-all --- docs/source/wiki_all_dataset.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/wiki_all_dataset.md b/docs/source/wiki_all_dataset.md index b3a336db1d..921b6c822f 100644 --- a/docs/source/wiki_all_dataset.md +++ b/docs/source/wiki_all_dataset.md @@ -6,7 +6,8 @@ The `wiki-all` dataset was created to stress vector search algorithms at scale w The dataset is composed of all the available languages of in the [Cohere Wikipedia dataset](https://huggingface.co/datasets/Cohere/wikipedia-22-12). An [English version]( https://www.kaggle.com/datasets/jjinho/wikipedia-20230701) is also available. -TODO: Brief summary of how the dataset was created + +We download the English wiki texts from https://www.kaggle.com/datasets/jjinho/wikipedia-20230701 and multi-lingual wiki texts from Cohere https://huggingface.co/datasets/Cohere/wikipedia-22-12. We notice that the English texts from Cohere is an older and smaller version 2022-12 than the kaggle English wiki texts 2023-07 so we remove the English texts from Cohere completely. In other words, the final wiki texts include English wiki from kaggle and other languages from Cohere. It should be noted that English texts constitute 50% of the total text size. Then, the wiki texts are chunked into 85 million 128-token pieces. For reference, Cohere chunks wiki texts into 104-token pieces. Finally, we compute the embedding of each chunk using paraphrase-multilingual-mpnet-base-v2 embedding model. In the end, we get an embedding matrix of size 85 million by 768. A version of the dataset is available in the format that can be used directly by the [raft-ann-bench]() tool. It's ~251GB, and has been split into multiple parts. From de7967c3dcbeb0f75dc128c49ef35d20592361fe Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 25 Oct 2023 15:21:50 -0400 Subject: [PATCH 3/5] Adding links for subsets. Adding new config files for all of them --- docs/source/wiki_all_dataset.md | 29 ++- .../raft-ann-bench/run/conf/wiki_all_10M.json | 200 ++++++++++++++++++ .../conf/{wiki_all.json => wiki_all_1M.json} | 24 +-- .../raft-ann-bench/run/conf/wiki_all_88M.json | 200 ++++++++++++++++++ 4 files changed, 434 insertions(+), 19 deletions(-) create mode 100644 python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_10M.json rename python/raft-ann-bench/src/raft-ann-bench/run/conf/{wiki_all.json => wiki_all_1M.json} (93%) create mode 100644 python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_88M.json diff --git a/docs/source/wiki_all_dataset.md b/docs/source/wiki_all_dataset.md index 921b6c822f..a1134ab9f2 100644 --- a/docs/source/wiki_all_dataset.md +++ b/docs/source/wiki_all_dataset.md @@ -7,13 +7,19 @@ The `wiki-all` dataset was created to stress vector search algorithms at scale w The dataset is composed of all the available languages of in the [Cohere Wikipedia dataset](https://huggingface.co/datasets/Cohere/wikipedia-22-12). An [English version]( https://www.kaggle.com/datasets/jjinho/wikipedia-20230701) is also available. -We download the English wiki texts from https://www.kaggle.com/datasets/jjinho/wikipedia-20230701 and multi-lingual wiki texts from Cohere https://huggingface.co/datasets/Cohere/wikipedia-22-12. We notice that the English texts from Cohere is an older and smaller version 2022-12 than the kaggle English wiki texts 2023-07 so we remove the English texts from Cohere completely. In other words, the final wiki texts include English wiki from kaggle and other languages from Cohere. It should be noted that English texts constitute 50% of the total text size. Then, the wiki texts are chunked into 85 million 128-token pieces. For reference, Cohere chunks wiki texts into 104-token pieces. Finally, we compute the embedding of each chunk using paraphrase-multilingual-mpnet-base-v2 embedding model. In the end, we get an embedding matrix of size 85 million by 768. +The dataset is composed of English wiki texts from [Kaggle](https://www.kaggle.com/datasets/jjinho/wikipedia-20230701) and multi-lingual wiki texts from [Cohere Wikipedia](https://huggingface.co/datasets/Cohere/wikipedia-22-12). -A version of the dataset is available in the format that can be used directly by the [raft-ann-bench]() tool. It's ~251GB, and has been split into multiple parts. +Cohere's English Texts are older (2022) and smaller than the Kaggle English Wiki texts (2023) so the English texts have been removed from Cohere completely. The final Wiki texts include English Wiki from Kaggle and the other languages from Cohere. The English texts constitute 50% of the total text size. -The following will download all 10 the parts and untar them to a `wiki_all` directory: +To form the final dataset, the Wiki texts were chunked into 85 million 128-token pieces. For reference, Cohere chunks Wiki texts into 104-token pieces. Finally, the embeddings of each chunk were computed using the [paraphrase-multilingual-mpnet-base-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2) embedding model. The resulting dataset is an embedding matrix of size 88 million by 768. Also included with the dataset is a query file containing 10k query vectors and a groundtruth file to evaluate nearest neighbors algorithms. + +### Full dataset + +A version of the dataset is made available in the binary format that can be used directly by the [raft-ann-bench](https://docs.rapids.ai/api/raft/nightly/raft_ann_benchmarks/) tool. The full 88M dataset is ~251GB and the download link below contains tarballs that have been split into multiple parts. + +The following will download all 10 the parts and untar them to a `wiki_all_88M` directory: ```bash -curl -s https://data.rapids.ai/raft/datasets/wiki_all/wiki_all.tar.{00..9} | tar -xf - -C /datasets/wiki_all/ +curl -s https://data.rapids.ai/raft/datasets/wiki_all/wiki_all.tar.{00..9} | tar -xf - -C /datasets/wiki_all_88M/ ``` The above has the unfortunate drawback that if the command should fail for any reason, it cannot be restarted. The files can also be downloaded individually and then untarred to the directory. Each file is ~27GB and there are 10 of them. @@ -23,9 +29,18 @@ curl -s https://data.rapids.ai/raft/datasets/wiki_all/wiki_all.tar.00 ... curl -s https://data.rapids.ai/raft/datasets/wiki_all/wiki_all.tar.09 -cat wiki_all.tar.* | tar -xf - -C /datasets/wiki_all/ -```zsx +cat wiki_all.tar.* | tar -xf - -C /datasets/wiki_all_88M/ +``` + +### 1M and 10M subsets + +Also available are 1M and 10M subsets of the full dataset which are 2.9GB and 29GB, respectively. These subsets also include query sets of 10k vectors and corresponding groundtruth files. + +```bash +curl -s https://data.rapids.ai/raft/datasets/wiki_all_1M/wiki_all_1M.tar +curl -s https://data.rapids.ai/raft/datasets/wiki_all_10M/wiki_all_10M.tar +``` ## Using the dataset -After the dataset is downloaded and extracted to the `wiki_all` directory, the files can be used in the benchmarking tool. The dataset name is `wiki_all`, and the benchmarking tool can be used by specifying `--dataset wiki_all` in the scripts. \ No newline at end of file +After the dataset is downloaded and extracted to the `wiki_all_88M` directory (or `wiki_all_1M`/`wiki_all_10M` depending on whether the subsets are used), the files can be used in the benchmarking tool. The dataset name is `wiki_all` (or `wiki_all_1M`/`wiki_all_10M`), and the benchmarking tool can be used by specifying the appropriate name `--dataset wiki_all_88M` in the scripts. diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_10M.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_10M.json new file mode 100644 index 0000000000..e5f77e7858 --- /dev/null +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_10M.json @@ -0,0 +1,200 @@ +{ + "dataset": { + "name": "wiki_all_10M", + "base_file": "wiki_all_10M/base.88M.fbin", + "query_file": "wiki_all_10M/queries.fbin", + "groundtruth_neighbors_file": "wiki_all_10M/groundtruth.88M.neighbors.ibin", + "distance": "euclidean" + }, + "search_basic_param": { + "batch_size": 10000, + "k": 10 + }, + "index": [ + { + "name": "hnswlib.M16.ef50", + "algo": "hnswlib", + "build_param": { "M": 16, "efConstruction": 50, "numThreads": 56 }, + "file": "wiki_all_10M/hnswlib/M16.ef50", + "search_params": [ + { "ef": 10, "numThreads": 56 }, + { "ef": 20, "numThreads": 56 }, + { "ef": 40, "numThreads": 56 }, + { "ef": 60, "numThreads": 56 }, + { "ef": 80, "numThreads": 56 }, + { "ef": 120, "numThreads": 56 }, + { "ef": 200, "numThreads": 56 }, + { "ef": 400, "numThreads": 56 }, + { "ef": 600, "numThreads": 56 }, + { "ef": 800, "numThreads": 56 } + ] + }, + { + "name": "faiss_ivf_pq.M32-nlist16K", + "algo": "faiss_gpu_ivf_pq", + "build_param": { + "M": 32, + "nlist": 16384, + "ratio": 2 + }, + "file": "wiki_all_10M/faiss_ivf_pq/M32-nlist16K_ratio2", + "search_params": [ + { "nprobe": 10 }, + { "nprobe": 20 }, + { "nprobe": 30 }, + { "nprobe": 40 }, + { "nprobe": 50 }, + { "nprobe": 100 }, + { "nprobe": 200 }, + { "nprobe": 500 } + ] + }, + { + "name": "faiss_ivf_pq.M64-nlist16K", + "algo": "faiss_gpu_ivf_pq", + "build_param": { + "M": 64, + "nlist": 16384, + "ratio": 2 + }, + "file": "wiki_all_10M/faiss_ivf_pq/M64-nlist16K_ratio2", + "search_params": [ + { "nprobe": 10 }, + { "nprobe": 20 }, + { "nprobe": 30 }, + { "nprobe": 40 }, + { "nprobe": 50 }, + { "nprobe": 100 }, + { "nprobe": 200 }, + { "nprobe": 500 } + ] + }, + { + "name": "raft_ivf_pq.d128-nlist16K", + "algo": "raft_ivf_pq", + "build_param": { + "pq_dim": 128, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "file": "wiki_all_10M/raft_ivf_pq/d128-nlist16K", + "search_params": [ + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 } + ] + }, + { + "name": "raft_ivf_pq.d64-nlist16K", + "algo": "raft_ivf_pq", + "build_param": { + "pq_dim": 64, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "file": "wiki_all_10M/raft_ivf_pq/d64-nlist16K", + "search_params": [ + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 } + ] + }, + { + "name": "raft_ivf_pq.d32-nlist16K", + "algo": "raft_ivf_pq", + "build_param": { + "pq_dim": 32, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "file": "wiki_all_10M/raft_ivf_pq/d32-nlist16K", + "search_params": [ + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 } + ] + }, + { + "name": "raft_ivf_pq.d32X-nlist16K", + "algo": "raft_ivf_pq", + "build_param": { + "pq_dim": 32, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "file": "wiki_all_10M/raft_ivf_pq/d32-nlist16K", + "search_params": [ + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 } + + ] + }, + { + "name": "raft_cagra.dim32.multi_cta", + "algo": "raft_cagra", + "build_param": { "graph_degree": 32, "intermediate_graph_degree": 48 }, + "file": "wiki_all_10M/raft_cagra/dim32.ibin", + "search_params": [ + { "itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 36, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 40, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 44, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 48, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 16, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 24, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 26, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 32, "algo": "multi_cta" }, + { "itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_cta" }, + { "itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_cta" }, + { "itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_cta" }, + { "itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_cta" }, + { "itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_cta" }, + { "itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_cta" }, + { "itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_cta" }, + { "itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_cta" }, + { "itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_cta" }, + { "itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_cta" } + ] + } + + ] +} + diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_1M.json similarity index 93% rename from python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all.json rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_1M.json index 00f232310e..6eb72a65a1 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_1M.json @@ -1,9 +1,9 @@ { "dataset": { - "name": "wiki_all", - "base_file": "wiki_all/base.88M.fbin", - "query_file": "wiki_all/queries.fbin", - "groundtruth_neighbors_file": "wiki_all/groundtruth.88M.neighbors.ibin", + "name": "wiki_all_1M", + "base_file": "wiki_all_1M/base.88M.fbin", + "query_file": "wiki_all_1M/queries.fbin", + "groundtruth_neighbors_file": "wiki_all_1M/groundtruth.88M.neighbors.ibin", "distance": "euclidean" }, "search_basic_param": { @@ -15,7 +15,7 @@ "name": "hnswlib.M16.ef50", "algo": "hnswlib", "build_param": { "M": 16, "efConstruction": 50, "numThreads": 56 }, - "file": "wiki_all/hnswlib/M16.ef50", + "file": "wiki_all_1M/hnswlib/M16.ef50", "search_params": [ { "ef": 10, "numThreads": 56 }, { "ef": 20, "numThreads": 56 }, @@ -37,7 +37,7 @@ "nlist": 16384, "ratio": 2 }, - "file": "wiki_all/faiss_ivf_pq/M32-nlist16K_ratio2", + "file": "wiki_all_1M/faiss_ivf_pq/M32-nlist16K_ratio2", "search_params": [ { "nprobe": 10 }, { "nprobe": 20 }, @@ -57,7 +57,7 @@ "nlist": 16384, "ratio": 2 }, - "file": "wiki_all/faiss_ivf_pq/M64-nlist16K_ratio2", + "file": "wiki_all_1M/faiss_ivf_pq/M64-nlist16K_ratio2", "search_params": [ { "nprobe": 10 }, { "nprobe": 20 }, @@ -79,7 +79,7 @@ "niter": 10, "ratio": 10 }, - "file": "wiki_all/raft_ivf_pq/d128-nlist16K", + "file": "wiki_all_1M/raft_ivf_pq/d128-nlist16K", "search_params": [ { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, @@ -100,7 +100,7 @@ "niter": 10, "ratio": 10 }, - "file": "wiki_all/raft_ivf_pq/d64-nlist16K", + "file": "wiki_all_1M/raft_ivf_pq/d64-nlist16K", "search_params": [ { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, @@ -121,7 +121,7 @@ "niter": 10, "ratio": 10 }, - "file": "wiki_all/raft_ivf_pq/d32-nlist16K", + "file": "wiki_all_1M/raft_ivf_pq/d32-nlist16K", "search_params": [ { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, @@ -142,7 +142,7 @@ "niter": 10, "ratio": 10 }, - "file": "wiki_all/raft_ivf_pq/d32-nlist16K", + "file": "wiki_all_1M/raft_ivf_pq/d32-nlist16K", "search_params": [ { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, @@ -170,7 +170,7 @@ "name": "raft_cagra.dim32.multi_cta", "algo": "raft_cagra", "build_param": { "graph_degree": 32, "intermediate_graph_degree": 48 }, - "file": "wiki_all/raft_cagra/dim32.ibin", + "file": "wiki_all_1M/raft_cagra/dim32.ibin", "search_params": [ { "itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_cta" }, { "itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_cta" }, diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_88M.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_88M.json new file mode 100644 index 0000000000..e50b40f554 --- /dev/null +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_88M.json @@ -0,0 +1,200 @@ +{ + "dataset": { + "name": "wiki_all_88M", + "base_file": "wiki_all_88M/base.88M.fbin", + "query_file": "wiki_all_88M/queries.fbin", + "groundtruth_neighbors_file": "wiki_all_88M/groundtruth.88M.neighbors.ibin", + "distance": "euclidean" + }, + "search_basic_param": { + "batch_size": 10000, + "k": 10 + }, + "index": [ + { + "name": "hnswlib.M16.ef50", + "algo": "hnswlib", + "build_param": { "M": 16, "efConstruction": 50, "numThreads": 56 }, + "file": "wiki_all_88M/hnswlib/M16.ef50", + "search_params": [ + { "ef": 10, "numThreads": 56 }, + { "ef": 20, "numThreads": 56 }, + { "ef": 40, "numThreads": 56 }, + { "ef": 60, "numThreads": 56 }, + { "ef": 80, "numThreads": 56 }, + { "ef": 120, "numThreads": 56 }, + { "ef": 200, "numThreads": 56 }, + { "ef": 400, "numThreads": 56 }, + { "ef": 600, "numThreads": 56 }, + { "ef": 800, "numThreads": 56 } + ] + }, + { + "name": "faiss_ivf_pq.M32-nlist16K", + "algo": "faiss_gpu_ivf_pq", + "build_param": { + "M": 32, + "nlist": 16384, + "ratio": 2 + }, + "file": "wiki_all_88M/faiss_ivf_pq/M32-nlist16K_ratio2", + "search_params": [ + { "nprobe": 10 }, + { "nprobe": 20 }, + { "nprobe": 30 }, + { "nprobe": 40 }, + { "nprobe": 50 }, + { "nprobe": 100 }, + { "nprobe": 200 }, + { "nprobe": 500 } + ] + }, + { + "name": "faiss_ivf_pq.M64-nlist16K", + "algo": "faiss_gpu_ivf_pq", + "build_param": { + "M": 64, + "nlist": 16384, + "ratio": 2 + }, + "file": "wiki_all_88M/faiss_ivf_pq/M64-nlist16K_ratio2", + "search_params": [ + { "nprobe": 10 }, + { "nprobe": 20 }, + { "nprobe": 30 }, + { "nprobe": 40 }, + { "nprobe": 50 }, + { "nprobe": 100 }, + { "nprobe": 200 }, + { "nprobe": 500 } + ] + }, + { + "name": "raft_ivf_pq.d128-nlist16K", + "algo": "raft_ivf_pq", + "build_param": { + "pq_dim": 128, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "file": "wiki_all_88M/raft_ivf_pq/d128-nlist16K", + "search_params": [ + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 } + ] + }, + { + "name": "raft_ivf_pq.d64-nlist16K", + "algo": "raft_ivf_pq", + "build_param": { + "pq_dim": 64, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "file": "wiki_all_88M/raft_ivf_pq/d64-nlist16K", + "search_params": [ + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 } + ] + }, + { + "name": "raft_ivf_pq.d32-nlist16K", + "algo": "raft_ivf_pq", + "build_param": { + "pq_dim": 32, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "file": "wiki_all_88M/raft_ivf_pq/d32-nlist16K", + "search_params": [ + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 } + ] + }, + { + "name": "raft_ivf_pq.d32X-nlist16K", + "algo": "raft_ivf_pq", + "build_param": { + "pq_dim": 32, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "file": "wiki_all_88M/raft_ivf_pq/d32-nlist16K", + "search_params": [ + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 } + + ] + }, + { + "name": "raft_cagra.dim32.multi_cta", + "algo": "raft_cagra", + "build_param": { "graph_degree": 32, "intermediate_graph_degree": 48 }, + "file": "wiki_all_88M/raft_cagra/dim32.ibin", + "search_params": [ + { "itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 36, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 40, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 44, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 48, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 16, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 24, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 26, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 32, "algo": "multi_cta" }, + { "itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_cta" }, + { "itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_cta" }, + { "itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_cta" }, + { "itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_cta" }, + { "itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_cta" }, + { "itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_cta" }, + { "itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_cta" }, + { "itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_cta" }, + { "itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_cta" }, + { "itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_cta" } + ] + } + + ] +} + From 8506b5c1b561b49edd8e7be3be1ffce59e277c75 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 25 Oct 2023 15:29:16 -0400 Subject: [PATCH 4/5] Adding to main raft-ann-bench docs --- docs/source/raft_ann_benchmarks.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md index 25fdf3f0f6..ef378facb5 100644 --- a/docs/source/raft_ann_benchmarks.md +++ b/docs/source/raft_ann_benchmarks.md @@ -18,6 +18,8 @@ This project provides a benchmark program for various ANN search implementations - [Running with Docker containers](#running-with-docker-containers) - [Creating and customizing dataset configurations](#creating-and-customizing-dataset-configurations) - [Adding a new ANN algorithm](#adding-a-new-ann-algorithm) +- [Parameter tuning guide](https://docs.rapids.ai/api/raft/nightly/ann_benchmarks_param_tuning/) +- [Wiki-all RAG/LLM Dataset](https://docs.rapids.ai/api/raft/nightly/wiki_all_dataset/) ## Installing the benchmarks @@ -242,15 +244,18 @@ Configuration files already exist for the following list of the million-scale da | `nytimes-256-angular` | 290K | 256 | 10K | Angular | | `sift-128-euclidean` | 1M | 128 | 10K | Euclidean| -All of the datasets above contain ground test datasets with 100 neighbors. Thus `k` for these datasets must be less than or equal to 100. +All of the datasets above contain ground test datasets with 100 neighbors. Thus `k` for these datasets must be less than or equal to 100. ### End to end: large-scale benchmarks (>10M vectors) + `raft-ann-bench.get_dataset` cannot be used to download the [billion-scale datasets](ann_benchmarks_dataset.md#billion-scale) due to their size. You should instead use our billion-scale datasets guide to download and prepare them. All other python commands mentioned below work as intended once the billion-scale dataset has been downloaded. To download billion-scale datasets, visit [big-ann-benchmarks](http://big-ann-benchmarks.com/neurips21.html) +We also provide a new dataset called `wiki-all` containing 88 million 768-dimensional vectors. This dataset is meant for benchmarking a realistic RAG/LLM embedding size at scale. It also contains 1M and 10M vector subsets for smaller-scale experiments. See our [Wiki-all Dataset Guide](https://docs.rapids.ai/api/raft/nightly/wiki_all_dataset/) for more information and to download the dataset. + The steps below demonstrate how to download, install, and run benchmarks on a subset of 100M vectors from the Yandex Deep-1B dataset. Please note that datasets of this scale are recommended for GPUs with larger amounts of memory, such as the A100 or H100. ```bash From 61a3fdcada054db454ea70313cd323c8c44a432d Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 25 Oct 2023 15:55:48 -0400 Subject: [PATCH 5/5] Implementing feedback from eview --- docs/source/raft_ann_benchmarks.md | 2 +- docs/source/wiki_all_dataset.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md index ef378facb5..fadca595fb 100644 --- a/docs/source/raft_ann_benchmarks.md +++ b/docs/source/raft_ann_benchmarks.md @@ -254,7 +254,7 @@ All other python commands mentioned below work as intended once the billion-scale dataset has been downloaded. To download billion-scale datasets, visit [big-ann-benchmarks](http://big-ann-benchmarks.com/neurips21.html) -We also provide a new dataset called `wiki-all` containing 88 million 768-dimensional vectors. This dataset is meant for benchmarking a realistic RAG/LLM embedding size at scale. It also contains 1M and 10M vector subsets for smaller-scale experiments. See our [Wiki-all Dataset Guide](https://docs.rapids.ai/api/raft/nightly/wiki_all_dataset/) for more information and to download the dataset. +We also provide a new dataset called `wiki-all` containing 88 million 768-dimensional vectors. This dataset is meant for benchmarking a realistic retrieval-augmented generation (RAG)/LLM embedding size at scale. It also contains 1M and 10M vector subsets for smaller-scale experiments. See our [Wiki-all Dataset Guide](https://docs.rapids.ai/api/raft/nightly/wiki_all_dataset/) for more information and to download the dataset. The steps below demonstrate how to download, install, and run benchmarks on a subset of 100M vectors from the Yandex Deep-1B dataset. Please note that datasets of this scale are recommended for GPUs with larger amounts of memory, such as the A100 or H100. ```bash diff --git a/docs/source/wiki_all_dataset.md b/docs/source/wiki_all_dataset.md index a1134ab9f2..5c7f972b3d 100644 --- a/docs/source/wiki_all_dataset.md +++ b/docs/source/wiki_all_dataset.md @@ -22,7 +22,7 @@ The following will download all 10 the parts and untar them to a `wiki_all_88M` curl -s https://data.rapids.ai/raft/datasets/wiki_all/wiki_all.tar.{00..9} | tar -xf - -C /datasets/wiki_all_88M/ ``` -The above has the unfortunate drawback that if the command should fail for any reason, it cannot be restarted. The files can also be downloaded individually and then untarred to the directory. Each file is ~27GB and there are 10 of them. +The above has the unfortunate drawback that if the command should fail for any reason, all the parts need to be re-downloaded. The files can also be downloaded individually and then untarred to the directory. Each file is ~27GB and there are 10 of them. ```bash curl -s https://data.rapids.ai/raft/datasets/wiki_all/wiki_all.tar.00