Merge branch 'branch-23.12' into imp_one_block_radix

rapidsai · Nov 7, 2023 · 6e5cda1 · 6e5cda1
2 parents f246748 + 62c1687
commit 6e5cda1
Show file tree

Hide file tree

Showing 2 changed files with 52 additions and 12 deletions.
diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
@@ -152,6 +152,11 @@ options:
   -f, --force           re-run algorithms even if their results already exist (default: False)
   -m SEARCH_MODE, --search-mode SEARCH_MODE
                         run search in 'latency' (measure individual batches) or 'throughput' (pipeline batches and measure end-to-end) mode (default: throughput)
+  -t SEARCH_THREADS, --search-threads SEARCH_THREADS
+                        specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. Example --search-threads=1:4. Power of 2 values between 'min' and 'max' will be used. If only 'min' is
+                        specified, then a single test is run with 'min' threads. By default min=1, max=<num hyper threads>. (default: None)
+  -r, --dry-run         dry-run mode will convert the yaml config for the specified algorithms and datasets to the json format that's consumed by the lower-level c++ binaries and then print the command to run execute the benchmarks but
+                        will not actually execute the command. (default: False)
 ```
 
 `dataset`: name of the dataset to be searched in [datasets.yaml](#yaml-dataset-config)

diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
@@ -18,6 +18,7 @@
 import json
 import os
 import subprocess
+import uuid
 from importlib import import_module
 
 import yaml
@@ -80,23 +81,24 @@ def run_build_and_search(
     force,
     build,
     search,
+    dry_run,
     k,
     batch_size,
     search_threads,
     mode="throughput",
 ):
     for executable, ann_executable_path, algo in executables_to_run.keys():
         # Need to write temporary configuration
-        temp_conf_filename = f"temporary_{conf_filename}"
-        temp_conf_filepath = os.path.join(conf_filedir, temp_conf_filename)
-        with open(temp_conf_filepath, "w") as f:
+        temp_conf_filename = f"{conf_filename}_{algo}_{uuid.uuid1()}.json"
+        with open(temp_conf_filename, "w") as f:
             temp_conf = dict()
             temp_conf["dataset"] = conf_file["dataset"]
             temp_conf["search_basic_param"] = conf_file["search_basic_param"]
             temp_conf["index"] = executables_to_run[
                 (executable, ann_executable_path, algo)
             ]["index"]
-            json.dump(temp_conf, f)
+            json_str = json.dumps(temp_conf, indent=2)
+            f.write(json_str)
 
         legacy_result_folder = os.path.join(
             dataset_path, conf_file["dataset"]["name"], "result"
@@ -116,8 +118,19 @@ def run_build_and_search(
             ]
             if force:
                 cmd = cmd + ["--overwrite"]
-            cmd = cmd + [temp_conf_filepath]
-            subprocess.run(cmd, check=True)
+            cmd = cmd + [temp_conf_filename]
+
+            if dry_run:
+                print(
+                    "Benchmark command for %s:\n%s\n" % (algo, " ".join(cmd))
+                )
+            else:
+                try:
+                    subprocess.run(cmd, check=True)
+                except Exception as e:
+                    print("Error occurred running benchmark: %s" % e)
+                finally:
+                    os.remove(temp_conf_filename)
 
         if search:
             search_folder = os.path.join(legacy_result_folder, "search")
@@ -141,10 +154,18 @@ def run_build_and_search(
             if search_threads:
                 cmd = cmd + ["--threads=%s" % search_threads]
 
-            cmd = cmd + [temp_conf_filepath]
-            subprocess.run(cmd, check=True)
-
-        os.remove(temp_conf_filepath)
+            cmd = cmd + [temp_conf_filename]
+            if dry_run:
+                print(
+                    "Benchmark command for %s:\n%s\n" % (algo, " ".join(cmd))
+                )
+            else:
+                try:
+                    subprocess.run(cmd, check=True)
+                except Exception as e:
+                    print("Error occurred running benchmark: %s" % e)
+                finally:
+                    os.remove(temp_conf_filename)
 
 
 def main():
@@ -253,13 +274,24 @@ def main():
         "--search-threads",
         help="specify the number threads to use for throughput benchmark."
         " Single value or a pair of min and max separated by ':'. "
-        "Example --threads=1:4. Power of 2 values between 'min' "
+        "Example: --search-threads=1:4. Power of 2 values between 'min' "
         "and 'max' will be used. If only 'min' is specified, then a "
         "single test is run with 'min' threads. By default min=1, "
         "max=<num hyper threads>.",
         default=None,
     )
 
+    parser.add_argument(
+        "-r",
+        "--dry-run",
+        help="dry-run mode will convert the yaml config for the specified "
+        "algorithms and datasets to the json format that's consumed "
+        "by the lower-level c++ binaries and then print the command "
+        "to run execute the benchmarks but will not actually execute "
+        "the command.",
+        action="store_true",
+    )
+
     args = parser.parse_args()
 
     # If both build and search are not provided,
@@ -271,6 +303,8 @@ def main():
         build = args.build
         search = args.search
 
+    dry_run = args.dry_run
+
     mode = args.search_mode
     k = args.count
     batch_size = args.batch_size
@@ -452,13 +486,14 @@ def add_algo_group(group_list):
 
     run_build_and_search(
         conf_file,
-        f"{args.dataset}.json",
+        f"{args.dataset}",
         conf_filedir,
         executables_to_run,
         args.dataset_path,
         args.force,
         build,
         search,
+        dry_run,
         k,
         batch_size,
         args.search_threads,