Skip to content

Commit

Permalink
Merge branch 'branch-23.12' into ann_bench_throughput_stride
Browse files Browse the repository at this point in the history
  • Loading branch information
tfeher authored Nov 9, 2023
2 parents f42b2af + 061c0cf commit c0a161f
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 65 deletions.
146 changes: 83 additions & 63 deletions python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import argparse
import json
import os
import sys
import traceback
import warnings

import pandas as pd
Expand Down Expand Up @@ -57,74 +59,89 @@ def read_file(dataset, dataset_path, method):

def convert_json_to_csv_build(dataset, dataset_path):
for file, algo_name, df in read_file(dataset, dataset_path, "build"):
algo_name = algo_name.replace("_base", "")
df["name"] = df["name"].str.split("/").str[0]
write = pd.DataFrame(
{
"algo_name": [algo_name] * len(df),
"index_name": df["name"],
"time": df["real_time"],
}
)
for name in df:
if name not in skip_build_cols:
write[name] = df[name]
filepath = os.path.normpath(file).split(os.sep)
filename = filepath[-1].split("-")[0] + ".csv"
write.to_csv(
os.path.join(f"{os.sep}".join(filepath[:-1]), filename),
index=False,
)
try:
algo_name = algo_name.replace("_base", "")
df["name"] = df["name"].str.split("/").str[0]
write = pd.DataFrame(
{
"algo_name": [algo_name] * len(df),
"index_name": df["name"],
"time": df["real_time"],
}
)
for name in df:
if name not in skip_build_cols:
write[name] = df[name]
filepath = os.path.normpath(file).split(os.sep)
filename = filepath[-1].split("-")[0] + ".csv"
write.to_csv(
os.path.join(f"{os.sep}".join(filepath[:-1]), filename),
index=False,
)
except Exception as e:
print(
"An error occurred processing file %s (%s). Skipping..."
% (file, e)
)
traceback.print_exc()


def convert_json_to_csv_search(dataset, dataset_path):
for file, algo_name, df in read_file(dataset, dataset_path, "search"):
build_file = os.path.join(
dataset_path, dataset, "result", "build", f"{algo_name}.csv"
)
algo_name = algo_name.replace("_base", "")
df["name"] = df["name"].str.split("/").str[0]
write = pd.DataFrame(
{
"algo_name": [algo_name] * len(df),
"index_name": df["name"],
"recall": df["Recall"],
"qps": df["items_per_second"],
}
)
for name in df:
if name not in skip_search_cols:
write[name] = df[name]

if os.path.exists(build_file):
build_df = pd.read_csv(build_file)
write_ncols = len(write.columns)
write["build time"] = None
write["build threads"] = None
write["build cpu_time"] = None
write["build GPU"] = None

for col_idx in range(5, len(build_df.columns)):
col_name = build_df.columns[col_idx]
write[col_name] = None

for s_index, search_row in write.iterrows():
for b_index, build_row in build_df.iterrows():
if search_row["index_name"] == build_row["index_name"]:
write.iloc[s_index, write_ncols] = build_df.iloc[
b_index, 2
]
write.iloc[s_index, write_ncols + 1 :] = build_df.iloc[
b_index, 3:
]
break
else:
warnings.warn(
f"Build CSV not found for {algo_name}, build params won't be "
"appended in the Search CSV"
try:
build_file = os.path.join(
dataset_path, dataset, "result", "build", f"{algo_name}.csv"
)

write.to_csv(file.replace(".json", ".csv"), index=False)
algo_name = algo_name.replace("_base", "")
df["name"] = df["name"].str.split("/").str[0]
write = pd.DataFrame(
{
"algo_name": [algo_name] * len(df),
"index_name": df["name"],
"recall": df["Recall"],
"qps": df["items_per_second"],
}
)
for name in df:
if name not in skip_search_cols:
write[name] = df[name]

if os.path.exists(build_file):
build_df = pd.read_csv(build_file)
write_ncols = len(write.columns)
write["build time"] = None
write["build threads"] = None
write["build cpu_time"] = None
write["build GPU"] = None

for col_idx in range(5, len(build_df.columns)):
col_name = build_df.columns[col_idx]
write[col_name] = None

for s_index, search_row in write.iterrows():
for b_index, build_row in build_df.iterrows():
if search_row["index_name"] == build_row["index_name"]:
write.iloc[s_index, write_ncols] = build_df.iloc[
b_index, 2
]
write.iloc[
s_index, write_ncols + 1 :
] = build_df.iloc[b_index, 3:]
break
else:
warnings.warn(
f"Build CSV not found for {algo_name}, "
f"build params won't be "
"appended in the Search CSV"
)

write.to_csv(file.replace(".json", ".csv"), index=False)
except Exception as e:
print(
"An error occurred processing file %s (%s). Skipping..."
% (file, e)
)
traceback.print_exc()


def main():
Expand All @@ -147,6 +164,9 @@ def main():
default=default_dataset_path,
)

if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()

convert_json_to_csv_build(args.dataset, args.dataset_path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#
import argparse
import os
import sys

import cupy as cp
import numpy as np
Expand Down Expand Up @@ -178,6 +179,9 @@ def main():
" commonly used with RAFT ANN are 'sqeuclidean' and 'inner_product'",
)

if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()

if args.rows is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import argparse
import os
import subprocess
import sys
from urllib.request import urlretrieve


Expand Down Expand Up @@ -101,6 +102,10 @@ def main():
help="normalize cosine distance to inner product",
action="store_true",
)

if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()

download(args.dataset, args.normalize, args.dataset_path)
Expand Down
4 changes: 4 additions & 0 deletions python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import argparse
import itertools
import os
import sys
from collections import OrderedDict

import matplotlib as mpl
Expand Down Expand Up @@ -486,6 +487,9 @@ def main():
action="store_true",
)

if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()

if args.algorithms:
Expand Down
17 changes: 15 additions & 2 deletions python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
import json
import os
import subprocess
import sys
import uuid
import warnings
from importlib import import_module

import yaml
Expand Down Expand Up @@ -130,7 +132,8 @@ def run_build_and_search(
except Exception as e:
print("Error occurred running benchmark: %s" % e)
finally:
os.remove(temp_conf_filename)
if not search:
os.remove(temp_conf_filename)

if search:
search_folder = os.path.join(legacy_result_folder, "search")
Expand Down Expand Up @@ -292,6 +295,9 @@ def main():
action="store_true",
)

if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()

# If both build and search are not provided,
Expand Down Expand Up @@ -368,7 +374,14 @@ def main():
algos_conf = dict()
for algo_f in algos_conf_fs:
with open(algo_f, "r") as f:
algo = yaml.safe_load(f)
try:
algo = yaml.safe_load(f)
except Exception as e:
warnings.warn(
f"Could not load YAML config {algo_f} due to "
+ e.with_traceback()
)
continue
insert_algo = True
insert_algo_group = False
if filter_algos:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import argparse
import os
import subprocess
import sys


def split_groundtruth(groundtruth_filepath):
Expand Down Expand Up @@ -43,6 +44,10 @@ def main():
help="Path to billion-scale dataset groundtruth file",
required=True,
)

if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()

split_groundtruth(args.groundtruth)
Expand Down

0 comments on commit c0a161f

Please sign in to comment.