Skip to content

Commit

Permalink
Merge branch 'branch-23.12' into imp_one_block_radix
Browse files Browse the repository at this point in the history
  • Loading branch information
benfred authored Nov 7, 2023
2 parents f246748 + 62c1687 commit 6e5cda1
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 12 deletions.
5 changes: 5 additions & 0 deletions docs/source/raft_ann_benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,11 @@ options:
-f, --force re-run algorithms even if their results already exist (default: False)
-m SEARCH_MODE, --search-mode SEARCH_MODE
run search in 'latency' (measure individual batches) or 'throughput' (pipeline batches and measure end-to-end) mode (default: throughput)
-t SEARCH_THREADS, --search-threads SEARCH_THREADS
specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. Example --search-threads=1:4. Power of 2 values between 'min' and 'max' will be used. If only 'min' is
specified, then a single test is run with 'min' threads. By default min=1, max=<num hyper threads>. (default: None)
-r, --dry-run dry-run mode will convert the yaml config for the specified algorithms and datasets to the json format that's consumed by the lower-level c++ binaries and then print the command to run execute the benchmarks but
will not actually execute the command. (default: False)
```
`dataset`: name of the dataset to be searched in [datasets.yaml](#yaml-dataset-config)
Expand Down
59 changes: 47 additions & 12 deletions python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import json
import os
import subprocess
import uuid
from importlib import import_module

import yaml
Expand Down Expand Up @@ -80,23 +81,24 @@ def run_build_and_search(
force,
build,
search,
dry_run,
k,
batch_size,
search_threads,
mode="throughput",
):
for executable, ann_executable_path, algo in executables_to_run.keys():
# Need to write temporary configuration
temp_conf_filename = f"temporary_{conf_filename}"
temp_conf_filepath = os.path.join(conf_filedir, temp_conf_filename)
with open(temp_conf_filepath, "w") as f:
temp_conf_filename = f"{conf_filename}_{algo}_{uuid.uuid1()}.json"
with open(temp_conf_filename, "w") as f:
temp_conf = dict()
temp_conf["dataset"] = conf_file["dataset"]
temp_conf["search_basic_param"] = conf_file["search_basic_param"]
temp_conf["index"] = executables_to_run[
(executable, ann_executable_path, algo)
]["index"]
json.dump(temp_conf, f)
json_str = json.dumps(temp_conf, indent=2)
f.write(json_str)

legacy_result_folder = os.path.join(
dataset_path, conf_file["dataset"]["name"], "result"
Expand All @@ -116,8 +118,19 @@ def run_build_and_search(
]
if force:
cmd = cmd + ["--overwrite"]
cmd = cmd + [temp_conf_filepath]
subprocess.run(cmd, check=True)
cmd = cmd + [temp_conf_filename]

if dry_run:
print(
"Benchmark command for %s:\n%s\n" % (algo, " ".join(cmd))
)
else:
try:
subprocess.run(cmd, check=True)
except Exception as e:
print("Error occurred running benchmark: %s" % e)
finally:
os.remove(temp_conf_filename)

if search:
search_folder = os.path.join(legacy_result_folder, "search")
Expand All @@ -141,10 +154,18 @@ def run_build_and_search(
if search_threads:
cmd = cmd + ["--threads=%s" % search_threads]

cmd = cmd + [temp_conf_filepath]
subprocess.run(cmd, check=True)

os.remove(temp_conf_filepath)
cmd = cmd + [temp_conf_filename]
if dry_run:
print(
"Benchmark command for %s:\n%s\n" % (algo, " ".join(cmd))
)
else:
try:
subprocess.run(cmd, check=True)
except Exception as e:
print("Error occurred running benchmark: %s" % e)
finally:
os.remove(temp_conf_filename)


def main():
Expand Down Expand Up @@ -253,13 +274,24 @@ def main():
"--search-threads",
help="specify the number threads to use for throughput benchmark."
" Single value or a pair of min and max separated by ':'. "
"Example --threads=1:4. Power of 2 values between 'min' "
"Example: --search-threads=1:4. Power of 2 values between 'min' "
"and 'max' will be used. If only 'min' is specified, then a "
"single test is run with 'min' threads. By default min=1, "
"max=<num hyper threads>.",
default=None,
)

parser.add_argument(
"-r",
"--dry-run",
help="dry-run mode will convert the yaml config for the specified "
"algorithms and datasets to the json format that's consumed "
"by the lower-level c++ binaries and then print the command "
"to run execute the benchmarks but will not actually execute "
"the command.",
action="store_true",
)

args = parser.parse_args()

# If both build and search are not provided,
Expand All @@ -271,6 +303,8 @@ def main():
build = args.build
search = args.search

dry_run = args.dry_run

mode = args.search_mode
k = args.count
batch_size = args.batch_size
Expand Down Expand Up @@ -452,13 +486,14 @@ def add_algo_group(group_list):

run_build_and_search(
conf_file,
f"{args.dataset}.json",
f"{args.dataset}",
conf_filedir,
executables_to_run,
args.dataset_path,
args.force,
build,
search,
dry_run,
k,
batch_size,
args.search_threads,
Expand Down

0 comments on commit 6e5cda1

Please sign in to comment.