Skip to content

Commit

Permalink
update result analyzer script (#5920)
Browse files Browse the repository at this point in the history
  • Loading branch information
zpcore authored Nov 23, 2023
1 parent fe7b007 commit 037bbd6
Showing 1 changed file with 137 additions and 44 deletions.
181 changes: 137 additions & 44 deletions benchmarks/result_analyzer.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,10 @@
import argparse
from collections import OrderedDict
import copy
import csv
import io
import json
import logging
import numpy as np
import os
import pandas as pd
import subprocess
import sys
import time
import torch
from tqdm import tqdm

logger = logging.getLogger(__name__)

Expand All @@ -25,11 +17,30 @@ def __init__(self, args):
self.output_dir = os.path.abspath(self._args.output_dirname)
if not os.path.exists(self.output_dir):
raise ValueError("The output directory does not exist.")
self.output_file = os.path.join(self.output_dir, "metric_report.csv")
self.output_file = os.path.join(self.output_dir,
"metric_report." + self._args.output_format)

self.database = os.path.abspath(self._args.database)

def run(self):
def run_jsonl(self):
jsonl_files = []
for file in os.listdir(self.output_dir):
if file.endswith(".jsonl"):
jsonl_files.append(os.path.join(self.output_dir, file))

all_test_runs = []
for file in jsonl_files:
test_runs = self.extract_metrics_jsonl(file)
all_test_runs.extend(test_runs)

with open(self.output_file, 'w+') as f:
for test_run in all_test_runs:
f.write(json.dumps(test_run))
f.write("\n")

print(f"Saving report to: {self.output_file}")

def run_csv(self):
jsonl_files = []
for file in os.listdir(self.output_dir):
if file.endswith(".jsonl"):
Expand Down Expand Up @@ -58,52 +69,116 @@ def run(self):
"outputs_file": pd.Series(dtype="str"),
})
for file in jsonl_files:
metric_df = self.extract_metrics(file, metric_df)

metric_df = self.extract_metrics_csv(file, metric_df)
# additional processing of the metric_df can be done here

self.export_metric_report(metric_df)

def extract_metrics(self, file, metric_df):
def get_calculated_metrics(self, d, dataline):
total_time = np.asarray(dataline["metrics"]["total_time"], dtype="float")
d["median_total_time"] = np.median(total_time)
per_iter_time = np.asarray(
dataline["metrics"]["per_iter_time"], dtype="float")
d["median_per_iter_time"] = np.median(per_iter_time)
if dataline["experiment"]["xla"]:
trace_per_iter_time = np.asarray(
dataline["metrics"]["trace_per_iter_time"], dtype="float")
d["xla_median_trace_per_iter_time"] = np.median(trace_per_iter_time)
d["xla_compile_time"] = np.max(total_time) - np.median(total_time)
else:
d["xla_median_trace_per_iter_time"] = -1
d["xla_compile_time"] = -1

if dataline["experiment"]["dynamo"]:
d["dynamo_compile_time"] = np.max(total_time) - np.median(total_time)
else:
d["dynamo_compile_time"] = -1
return d

# TODO: handle error message properly (database length restriction)
def extract_metrics_jsonl(self, file):
with open(file, mode="r", encoding="utf-8") as f:
jsonlines = f.read().splitlines()
runs = []
for jsonline in jsonlines:
dataline = json.loads(jsonline)
batch_size = dataline["experiment"]["batch_size"]
batch_side_value = -1 if batch_size is None else batch_size
xla = dataline["experiment"]["xla"]
xla_value = "None" if xla is None else xla
dynamo = dataline["experiment"]["dynamo"]
dynamo_value = "None" if dynamo is None else dynamo
test = dataline["experiment"]["test"]
test_value = "None" if test is None else test
outputs_file = dataline["experiment"].get("outputs_file", None)
outputs_file_value = "None" if outputs_file is None else outputs_file

d = {
"metrics": {
"timestamp": int(self.timestamp),
"batch_size": batch_side_value,
"repeat": dataline["repeat"],
"iterations_per_run": dataline["iterations_per_run"]
},
"dimensions": {
"suite_name": dataline["model"]["suite_name"],
"model_name": dataline["model"]["model_name"],
"experiment_name": dataline["experiment"]["experiment_name"],
"accelerator": dataline["experiment"]["accelerator_model"],
"accelerator_model": dataline["experiment"]["accelerator_model"],
"xla": xla_value,
"dynamo": dynamo_value,
"test": test_value,
"outputs_file": outputs_file_value
}
}

if "error" in dataline["metrics"] and not self._args.hide_errors:
d["error_message"] = dataline["metrics"]["error"]

if "error" not in dataline["metrics"]:
d["dimensions"]["run_status"] = "success"
d["metrics"] = self.get_calculated_metrics(d["metrics"], dataline)
else:
d["dimensions"]["run_status"] = "failure"
d["metrics"]["median_total_time"] = -1
d["metrics"]["median_per_iter_time"] = -1
d["metrics"]["xla_median_trace_per_iter_time"] = -1
d["metrics"]["xla_compile_time"] = -1
d["metrics"]["dynamo_compile_time"] = -1

runs.append(d)

return runs

def extract_metrics_csv(self, file, metric_df):
with open(file, mode="r", encoding="utf-8") as f:
jsonlines = f.read().splitlines()

for jsonline in jsonlines:
tmp = json.loads(jsonline)
dataline = json.loads(jsonline)
d = {
"timestamp": self.timestamp,
"suite_name": tmp["model"]["suite_name"],
"model_name": tmp["model"]["model_name"],
"experiment_name": tmp["experiment"]["experiment_name"],
"accelerator": tmp["experiment"]["accelerator"],
"accelerator_model": tmp["experiment"]["accelerator_model"],
"xla": tmp["experiment"]["xla"],
"xla_flags": tmp["experiment"]["xla_flags"],
"dynamo": tmp["experiment"]["dynamo"],
"test": tmp["experiment"]["test"],
"batch_size": tmp["experiment"]["batch_size"],
"repeat": tmp["repeat"],
"iterations_per_run": tmp["iterations_per_run"],
"suite_name": dataline["model"]["suite_name"],
"model_name": dataline["model"]["model_name"],
"experiment_name": dataline["experiment"]["experiment_name"],
"accelerator": dataline["experiment"]["accelerator"],
"accelerator_model": dataline["experiment"]["accelerator_model"],
"xla": dataline["experiment"]["xla"],
"xla_flags": dataline["experiment"]["xla_flags"],
"dynamo": dataline["experiment"]["dynamo"],
"test": dataline["experiment"]["test"],
"batch_size": dataline["experiment"]["batch_size"],
"repeat": dataline["repeat"],
"iterations_per_run": dataline["iterations_per_run"],
"error_message": None,
"outputs_file": tmp["outputs_file"],
"outputs_file": dataline["outputs_file"],
}

if "error" in tmp["metrics"] and not self._args.hide_errors:
d["error_message"] = tmp["metrics"]["error"]

if "error" not in tmp["metrics"]:
total_time = np.asarray(tmp["metrics"]["total_time"], dtype="float")
d["median_total_time"] = np.median(total_time)
per_iter_time = np.asarray(
tmp["metrics"]["per_iter_time"], dtype="float")
d["median_per_iter_time"] = np.median(per_iter_time)
if tmp["experiment"]["xla"]:
trace_per_iter_time = np.asarray(
tmp["metrics"]["trace_per_iter_time"], dtype="float")
d["xla_median_trace_per_iter_time"] = np.median(trace_per_iter_time)
d["xla_compile_time"] = np.max(total_time) - np.median(total_time)
if tmp["experiment"]["dynamo"]:
d["dynamo_compile_time"] = np.max(total_time) - np.median(total_time)
if "error" in dataline["metrics"] and not self._args.hide_errors:
d["error_message"] = dataline["metrics"]["error"]

if "error" not in dataline["metrics"]:
d = self.get_calculated_metrics(d, dataline)

new_row = pd.Series(d)
new_row.fillna(value=np.nan, inplace=True)
Expand All @@ -123,20 +198,38 @@ def export_metric_report(self, metric_df):
metric_df.to_csv(
self.database, mode="a", encoding="utf-8", header=False, index=False)

def run(self):
if self._args.output_format == "jsonl":
self.run_jsonl()
elif self._args.output_format == "csv":
self.run_csv()
else:
raise ValueError(f"Unsupported output format: {self._args.output_format}")


def parse_args(args=None):
parser = argparse.ArgumentParser()

parser.add_argument(
"--output-format",
default="jsonl",
type=str,
choices=["jsonl", "csv"],
help="Specify the output format.",
)

parser.add_argument(
"--log-level",
default="warning",
type=str,
choices=["info", "warning"],
help="Specify the logging level.",
)

parser.add_argument(
"--experiment-name",
default="run_all",
type=str,
choices=["run_all"],
help="Experiment name to run.",
)
Expand Down

0 comments on commit 037bbd6

Please sign in to comment.