diff --git a/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py index ad3a2490db..0c6e554e96 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py @@ -345,32 +345,74 @@ def load_lines(results_path, result_files, method, index_key): def load_all_results( - dataset_path, algorithms, k, batch_size, method, index_key + dataset_path, algorithms, groups, algo_groups, k, batch_size, method, + index_key ): results_path = os.path.join(dataset_path, "result", method) result_files = os.listdir(results_path) - print(result_files) + result_files = [result_file for result_file in result_files \ + if ".csv" in result_file] + # print(result_files) if method == "search": result_files = [ result_filename for result_filename in result_files if f"{k}-{batch_size}" in result_filename ] - if len(algorithms) > 0: - result_files = [ - result_filename - for result_filename in result_files - if result_filename.split("-")[0] in algorithms - ] - elif method == "build": - if len(algorithms) > 0: - result_files = [ - result_filename - for result_filename in result_files - if result_filename.split("-")[0] in algorithms - ] - - results = load_lines(results_path, result_files, method, index_key) + algo_group_files = [ + result_filename.split("-")[0] + for result_filename in result_files + ] + else: + algo_group_files = [ + result_filename + for result_filename in result_files + ] + for i in range(len(algo_group_files)): + algo_group = algo_group_files[i].replace(".csv", "").split("_") + if len(algo_group) == 2: + algo_group_files[i] = ("_".join(algo_group), "base") + else: + algo_group_files[i] = ("_".join(algo_group[:-1]), algo_group[-1]) + algo_group_files = list(zip(*algo_group_files)) + # final_groups = [result_files[i] for i in range(len(result_files)) if \ + # algo_group_files[i][1] in groups] + # if len(algorithms) > 0: + # final_algos = [final_groups[i] for i in range(len(result_files)) if \ + # ("_".join(result_files[i].split("_")[:-1]) in algorithms)] + # final_results = [] + if len(algorithms) > 0: + final_results = [result_files[i] for i in range(len(result_files)) if \ + (algo_group_files[0][i] in algorithms) and \ + (algo_group_files[1][i] in groups)] + else: + final_results = [result_files[i] for i in range(len(result_files)) if \ + (algo_group_files[1][i] in groups)] + + if len(algo_groups) > 0: + split_algo_groups = [algo_group.split(".") for algo_group in algo_groups] + split_algo_groups = list(zip(*split_algo_groups)) + final_algo_groups = [result_files[i] for i in range(len(result_files)) if \ + (algo_group_files[0][i] in split_algo_groups[0]) and \ + (algo_group_files[1][i] in split_algo_groups[1])] + final_results = final_results + final_algo_groups + final_results = set(final_results) + + # if len(algorithms) > 0: + # result_files = [ + # result_filename + # for result_filename in result_files + # if result_filename.split("-")[0] in algorithms + # ] + # elif method == "build": + # if len(algorithms) > 0: + # result_files = [ + # result_filename + # for result_filename in result_files + # if result_filename.split("-")[0] in algorithms + # ] + + results = load_lines(results_path, final_results, method, index_key) return results @@ -404,6 +446,15 @@ def main(): algorithms", default=None, ) + parser.add_argument( + "--groups", + help="plot only comma separated groups of parameters", + default="base" + ) + parser.add_argument( + "--algo-groups", + help="add comma separated algorithm+groups to the plot", + ) parser.add_argument( "-k", "--count", @@ -444,6 +495,11 @@ def main(): algorithms = args.algorithms.split(",") else: algorithms = [] + groups = args.groups.split(",") + if args.algo_groups: + algo_groups = args.algo_groups.split(",") + else: + algo_groups = [] k = args.count batch_size = args.batch_size if not args.build and not args.search: @@ -465,6 +521,8 @@ def main(): search_results = load_all_results( os.path.join(args.dataset_path, args.dataset), algorithms, + groups, + algo_groups, k, batch_size, "search", @@ -487,6 +545,8 @@ def main(): build_results = load_all_results( os.path.join(args.dataset_path, args.dataset), algorithms, + groups, + algo_groups, k, batch_size, "build", diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py index 7a362432d3..49d3ac9ec1 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py @@ -46,15 +46,20 @@ def validate_algorithm(algos_conf, algo, gpu_present): ) -def find_executable(algos_conf, algo, k, batch_size): +def find_executable(algos_conf, algo, group, k, batch_size): executable = algos_conf[algo]["executable"] + if group != "base": + return_str = f"{algo}_{group}-{k}-{batch_size}" + else: + return_str = f"{algo}-{k}-{batch_size}" + build_path = os.getenv("RAFT_HOME") if build_path is not None: build_path = os.path.join(build_path, "cpp", "build", executable) if os.path.exists(build_path): print(f"-- Using RAFT bench from repository in {build_path}. ") - return (executable, build_path, f"{algo}-{k}-{batch_size}") + return (executable, build_path, return_str) # if there is no build folder present, we look in the conda environment conda_path = os.getenv("CONDA_PREFIX") @@ -62,7 +67,7 @@ def find_executable(algos_conf, algo, k, batch_size): conda_path = os.path.join(conda_path, "bin", "ann", executable) if os.path.exists(conda_path): print("-- Using RAFT bench found in conda environment. ") - return (executable, conda_path, f"{algo}-{k}-{batch_size}") + return (executable, conda_path, return_str) else: raise FileNotFoundError(executable) @@ -218,9 +223,13 @@ def main(): # ) parser.add_argument( "--groups", - help="comma separated groups of parameters to run the benchmarks for", + help="run only comma separated groups of parameters", default="base", ) + parser.add_argument( + "--algo-groups", + help="add comma separated algorithm+groups to run", + ) parser.add_argument( "-f", "--force", @@ -293,30 +302,50 @@ def main(): if filter_algos: allowed_algos = args.algorithms.split(",") named_groups = args.groups.split(",") + filter_algo_groups = True if args.algo_groups else False + allowed_algo_groups = None + if filter_algo_groups: + allowed_algo_groups = [ + algo_group.split(".") for algo_group in args.algo_groups.split(",") + ] + allowed_algo_groups = list(zip(*allowed_algo_groups)) algos_conf = dict() for algo_f in algos_conf_fs: with open(algo_f, "r") as f: if algo_f.split("/")[-1] == "raft_cagra.yaml": algo = yaml.safe_load(f) insert_algo = True + insert_algo_group = False if filter_algos: if algo["name"] not in allowed_algos: insert_algo = False - if insert_algo: + if filter_algo_groups: + if algo["name"] in allowed_algo_groups[0]: + insert_algo_group = True + + def add_algo_group(group_list): if algo["name"] not in algos_conf: algos_conf[algo["name"]] = dict() for group in algo.keys(): if group != "name": - if group in named_groups: + if group in group_list: algos_conf[algo["name"]][group] = algo[group] + if insert_algo: + add_algo_group(named_groups) + if insert_algo_group: + add_algo_group(allowed_algo_groups[1]) + + print(algos_conf) executables_to_run = dict() for algo in algos_conf.keys(): validate_algorithm(algos_yaml, algo, gpu_present) - executable = find_executable(algos_yaml, algo, k, batch_size) - if executable not in executables_to_run: - executables_to_run[executable] = {"index": []} for group in algos_conf[algo].keys(): + executable = find_executable( + algos_yaml, algo, group, k, batch_size + ) + if executable not in executables_to_run: + executables_to_run[executable] = {"index": []} build_params = algos_conf[algo][group]["build"] search_params = algos_conf[algo][group]["search"] @@ -336,7 +365,10 @@ def main(): for params in all_build_params: index = {"algo": algo, "build_param": {}} - index_name = f"{algo}" + if group != "base": + index_name = f"{algo}_{group}" + else: + index_name = f"{algo}" for i in range(len(params)): index["build_param"][param_names[i]] = params[i] index_name += "." + f"{param_names[i]}{params[i]}" @@ -422,7 +454,6 @@ def main(): # ) # executables_to_run[executable_path]["index"][pos] = index - print(conf_filedir) run_build_and_search( conf_file, f"{args.dataset}.json", diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/faiss_gpu_ivf_pq.yml b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/faiss_gpu_ivf_pq.yml deleted file mode 100644 index 136f9ffcab..0000000000 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/faiss_gpu_ivf_pq.yml +++ /dev/null @@ -1,4 +0,0 @@ -name: faiss_gpu_ivf_pq - - base: - build: - search: \ No newline at end of file diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_ivf_flat.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_ivf_flat.yaml new file mode 100644 index 0000000000..2a41fb035d --- /dev/null +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_ivf_flat.yaml @@ -0,0 +1,8 @@ +name: raft_ivf_flat +base: + build: + nlist: [1024, 16384] + ratio: [1, 2] + niter: [20, 25] + search: + nprobe: [1, 5, 10, 50, 100, 200, 500, 1000, 2000] \ No newline at end of file diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_ivf_flat.yml b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_ivf_flat.yml deleted file mode 100644 index 4af425896a..0000000000 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_ivf_flat.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: raft_ivf_flat - - base: - build: - nlist: [1024, 16384] - ratio: [1, 2] - niter: [20, 25] - search: - nprobe: [1, 5, 10, 50, 100, 200, 500, 1000, 2000] \ No newline at end of file diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_ivf_pq.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_ivf_pq.yaml new file mode 100644 index 0000000000..0acc7ae694 --- /dev/null +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_ivf_pq.yaml @@ -0,0 +1,13 @@ +name: raft_ivf_pq +validator: raft-ann-bench.validators.raft_ivf_pq_validator +base: + build: + nlist: [1024] + pq_dim: [128, 64] + pq_bits: [8, 6] + ratio: [1] + niter: [25] + search: + nprobe: [1, 5, 10, 50, 100, 200, 500, 1000, 2000] + internalDistanceDtype: ["float", "half"] + smemLutDtype: ["float", "fp8", "half"] \ No newline at end of file diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_ivf_pq.yml b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_ivf_pq.yml deleted file mode 100644 index 3ef90bfb0d..0000000000 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_ivf_pq.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: raft_ivf_pq - - base: - build: - nlist: [1024] - pq_dim: [128, 64] - pq_bits: [8, 6] - ratio: [1] - niter: [25] - search: - nprobe: [1, 5, 10, 50, 100, 200, 500, 1000, 2000] - internalDistanceDtype: ["float", "half"] - smemLutDtype: ["float", "fp8", "half"] - - # Custom validator with a `validate()` function that accepts a parameter combination - # and returns false if it's not valid (for example, - # internalDistanceDtype should never be lower than smemLutDtype) - validators: raft-ann-bench.validators.raft_ivf_pq_validator \ No newline at end of file diff --git a/python/raft-ann-bench/src/raft-ann-bench/validators/__init__.py b/python/raft-ann-bench/src/raft-ann-bench/validators/__init__.py new file mode 100644 index 0000000000..b415d972c5 --- /dev/null +++ b/python/raft-ann-bench/src/raft-ann-bench/validators/__init__.py @@ -0,0 +1,25 @@ +# +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +DTYPE_SIZES = {"float": 4, "half": 2, "fp8": 1} + + +def ivf_pq_validator(params): + if "internalDistanceDtype" in params and "smemLutDtype" in params: + return ( + DTYPE_SIZES[params["smemLutDtype"]] + >= DTYPE_SIZES[params["internalDistanceDtype"]] + )