diff --git a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py index dd338c0c45..9d5fbb1675 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py @@ -20,6 +20,15 @@ import pandas as pd +skip_build_cols = set([ + "algo_name", "index_name", "time", "name", "family_index", + "per_family_instance_index", "run_name", "run_type", "repetitions", + "repetition_index", "iterations", "real_time", "cpu_time", "time_unit", + "index_size"]) + +skip_search_cols = set([ + "recall", "qps", "items_per_second", "Recall" +]) | skip_build_cols def read_file(dataset, dataset_path, method): dir = os.path.join(dataset_path, dataset, "result", method) @@ -42,6 +51,9 @@ def convert_json_to_csv_build(dataset, dataset_path): "time": df["real_time"], } ) + for name in df: + if name not in skip_build_cols: + write[name] = df[name] filepath = os.path.normpath(file).split(os.sep) filename = filepath[-1].split("-")[0] + ".csv" write.to_csv( @@ -52,6 +64,9 @@ def convert_json_to_csv_build(dataset, dataset_path): def convert_json_to_csv_search(dataset, dataset_path): for file, algo_name, df in read_file(dataset, dataset_path, "search"): + build_file = os.path.join( + dataset_path, dataset, "result", "build", f"{algo_name}.csv" + ) algo_name = algo_name.replace("_base", "") df["name"] = df["name"].str.split("/").str[0] write = pd.DataFrame( @@ -62,6 +77,28 @@ def convert_json_to_csv_search(dataset, dataset_path): "qps": df["items_per_second"], } ) + for name in df: + if name not in skip_search_cols: + write[name] = df[name] + print(build_file) + if os.path.exists( + build_file + ): + with open(build_file, "r") as f: + build_df = pd.read_csv(build_file) + write_n_cols = len(write.columns) + write["build GPU"] = None + write["build threads"] = None + for col_idx in range(5, len(build_df.columns)): + col_name = build_df.columns[col_idx] + write[col_name] = None + print(write.head()) + for s_index, search_row in write.iterrows(): + for b_index, build_row in build_df.iterrows(): + if search_row["index_name"] == build_row["index_name"]: + write.iloc[s_index, write_n_cols:] = build_df.iloc[b_index, 3:] + break + write.to_csv(file.replace(".json", ".csv"), index=False)