Skip to content

Commit

Permalink
add more fields to data exporter
Browse files Browse the repository at this point in the history
  • Loading branch information
divyegala committed Nov 7, 2023
1 parent c7aa826 commit b5ad4f8
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@

import pandas as pd

skip_build_cols = set([
"algo_name", "index_name", "time", "name", "family_index",
"per_family_instance_index", "run_name", "run_type", "repetitions",
"repetition_index", "iterations", "real_time", "cpu_time", "time_unit",
"index_size"])

skip_search_cols = set([
"recall", "qps", "items_per_second", "Recall"
]) | skip_build_cols

def read_file(dataset, dataset_path, method):
dir = os.path.join(dataset_path, dataset, "result", method)
Expand All @@ -42,6 +51,9 @@ def convert_json_to_csv_build(dataset, dataset_path):
"time": df["real_time"],
}
)
for name in df:
if name not in skip_build_cols:
write[name] = df[name]
filepath = os.path.normpath(file).split(os.sep)
filename = filepath[-1].split("-")[0] + ".csv"
write.to_csv(
Expand All @@ -52,6 +64,9 @@ def convert_json_to_csv_build(dataset, dataset_path):

def convert_json_to_csv_search(dataset, dataset_path):
for file, algo_name, df in read_file(dataset, dataset_path, "search"):
build_file = os.path.join(
dataset_path, dataset, "result", "build", f"{algo_name}.csv"
)
algo_name = algo_name.replace("_base", "")
df["name"] = df["name"].str.split("/").str[0]
write = pd.DataFrame(
Expand All @@ -62,6 +77,28 @@ def convert_json_to_csv_search(dataset, dataset_path):
"qps": df["items_per_second"],
}
)
for name in df:
if name not in skip_search_cols:
write[name] = df[name]
print(build_file)
if os.path.exists(
build_file
):
with open(build_file, "r") as f:
build_df = pd.read_csv(build_file)
write_n_cols = len(write.columns)
write["build GPU"] = None
write["build threads"] = None
for col_idx in range(5, len(build_df.columns)):
col_name = build_df.columns[col_idx]
write[col_name] = None
print(write.head())
for s_index, search_row in write.iterrows():
for b_index, build_row in build_df.iterrows():
if search_row["index_name"] == build_row["index_name"]:
write.iloc[s_index, write_n_cols:] = build_df.iloc[b_index, 3:]
break

write.to_csv(file.replace(".json", ".csv"), index=False)


Expand Down

0 comments on commit b5ad4f8

Please sign in to comment.