diff --git a/.circleci/common.sh b/.circleci/common.sh index 781298ec168..4ffc01c69dd 100755 --- a/.circleci/common.sh +++ b/.circleci/common.sh @@ -70,6 +70,7 @@ function install_deps_pytorch_xla() { pip install cloud-tpu-client pip install absl-py pip install pandas + pip install tabulate pip install --upgrade "numpy>=1.18.5" pip install --upgrade numba diff --git a/.circleci/docker/install_conda.sh b/.circleci/docker/install_conda.sh index 4e5ab24959a..10b2cd0402f 100644 --- a/.circleci/docker/install_conda.sh +++ b/.circleci/docker/install_conda.sh @@ -43,6 +43,7 @@ function install_and_setup_conda() { /usr/bin/yes | pip install expecttest==0.1.3 /usr/bin/yes | pip install absl-py /usr/bin/yes | pip install pandas + /usr/bin/yes | pip install tabulate # Additional PyTorch requirements /usr/bin/yes | pip install scikit-image scipy==1.6.3 /usr/bin/yes | pip install boto3==1.16.34 diff --git a/.kokoro/common.sh b/.kokoro/common.sh index 0a00026de4b..0a556cf0edf 100644 --- a/.kokoro/common.sh +++ b/.kokoro/common.sh @@ -60,6 +60,7 @@ function install_deps_pytorch_xla() { pip install cloud-tpu-client pip install absl-py pip install pandas + pip install tabulate pip install --upgrade "numpy>=1.18.5" pip install --upgrade numba diff --git a/benchmarks/aggregate.py b/benchmarks/aggregate.py index 83850f8a533..43fcf581afb 100644 --- a/benchmarks/aggregate.py +++ b/benchmarks/aggregate.py @@ -11,6 +11,7 @@ import sys import tiers import itertools +from tabulate import tabulate from typing import Any, Dict, List, NamedTuple import numpy as np from scipy.stats.mstats import gmean @@ -277,9 +278,45 @@ def get_pr_titles(args): return [titles, data_labels] +def speedup_header(title: str, backend_name: str, args): + if args.format == 'tab': + return f'Speedup\n{title}\nover\n{args.baseline.capitalize()}\n{backend_name}' + return f'Speedup({title}/{args.baseline.capitalize()} {backend_name})' + + +def modelname_header(model: str, args): + if args.format == 'tab': + return f'ModelName\n{model}' + return f'ModelName({model})' + + +def percentile_header(title: str, p: str, args): + if args.format == 'tab': + return f'{title}\n{p}' + return f'{title} {p}' + + +def pr_text(headers, rows, args): + if args.format == 'csv': + if headers: + headers[0] = f'# {headers[0]}' + print(','.join(headers)) + for row in rows: + print(','.join([str(f) if f is not None else '' for f in row])) + elif args.format == 'tab': + print( + tabulate(rows, headers=headers, tablefmt='fancy_grid', floatfmt='.2f')) + + def pr_latest(results_map: Dict[str, Any], args, timestamps: List[str]): titles, data_labels = get_pr_titles(args) + # speedups[backend] is the list of speedups vs. the baseline for that backend. + # Speedups are sorted in ascending order so that speedups[backend] is + # monotonically increasing. That is, due to the sorting, it is unlikely that + # speedups["foo"][i] and speedups["bar"][i] will correspond to the same model. speedups = [[] for _ in titles] + # model_names[backend][i] contains the model name corresponding to + # speedups[backend][i]. model_names = [[] for _ in titles] base_backend_name = _title_map[args.backends[0]] @@ -297,13 +334,20 @@ def pr_latest(results_map: Dict[str, Any], args, timestamps: List[str]): logger.warning(f'cannot find data for accelerator {args.accelerator}') return - if args.format == 'csv': - print(','.join(['# Workload'] + [ - f'Speedup({title}/{args.baseline.capitalize()} {base_backend_name}),StdDev,ModelName({title})' - for title in titles - ])) - # Note: the latest timestamp might not have results for all benchmarks. - max_len = max([len(l) for l in speedups]) + if args.format == 'csv' or args.format == 'tab': + headers = ['Workload'] + [ + header for title in titles for header in [ + speedup_header(title, base_backend_name, args), 'StdDev', + modelname_header(title, args) + ] + ] + + # Not all models run in all backends, so it is likely that + # len(speedups["foo"]) != len(speedups["bar"]). We therefore pad the speedup + # lists with "None" elements so that we have a "full table", i.e. all lists + # have the same length. This makes it trivial to generate correct CSV or + # tabular output. + num_rows = max([len(l) for l in speedups]) def pad_array(arr, desired_len, val): if len(arr) >= desired_len: @@ -311,16 +355,16 @@ def pad_array(arr, desired_len, val): arr += [val] * (desired_len - len(arr)) for i in range(len(titles)): - pad_array(speedups[i], max_len, Datapoint('', '')) - pad_array(model_names[i], max_len, '') - - for j in range(max_len): - print(','.join( - map(str, [j] + [ - v for i in range(len(titles)) - for v in (speedups[i][j].avg, speedups[i][j].std, - model_names[i][j]) - ]))) + pad_array(speedups[i], num_rows, Datapoint(None, None)) + pad_array(model_names[i], num_rows, None) + + rows = [] + for j in range(num_rows): + rows += [[j] + [ + v for i in range(len(titles)) + for v in (speedups[i][j].avg, speedups[i][j].std, model_names[i][j]) + ]] + pr_text(headers, rows, args) else: plt.figure(figsize=(args.fig_width, args.fig_height)) plt.axhline(y=1.0, color='lightgray') @@ -363,24 +407,24 @@ def pr_histogram(results_map: Dict[str, Any], args, timestamps: List[str]): titles, data_labels = get_pr_titles(args) percentiles = [f'p{p}' for p in (95, 50, 5)] labels = [f'{pfx}:speedups:{p}' for pfx in data_labels for p in percentiles] - full_titles = [f'{title} {p}' for title in titles for p in percentiles] + full_titles = [ + percentile_header(title, p, args) for title in titles for p in percentiles + ] base_backend_name = _title_map[args.backends[0]] x = [] y = [[] for i in range(len(labels))] for timestamp in timestamps: if labels[0] in results_map[timestamp]: - for label in labels: - assert label in results_map[timestamp] x.append(datetime.utcfromtimestamp(float(timestamp))) for i, label in enumerate(labels): - y[i].append( - pr_round(results_map[timestamp][label] if label in - results_map[timestamp] else Datapoint('', '')).avg) - if args.format == 'csv': - full_titles = ['# Datetime(UTC)'] + full_titles - print(','.join(full_titles)) + y[i].append((pr_round(results_map[timestamp][label]) if label + in results_map[timestamp] else Datapoint(None, None)).avg) + if args.format == 'csv' or args.format == 'tab': + headers = ['Datetime(UTC)'] + full_titles + rows = [] for j, utc in enumerate(x): - print(','.join([str(utc)] + [str(y[i][j]) for i in range(len(labels))])) + rows += [[utc] + [y[i][j] for i in range(len(labels))]] + pr_text(headers, rows, args) else: fig, ax = plt.subplots(figsize=(args.fig_width, args.fig_height)) ax.axhline(y=1.0, color='lightgray') @@ -420,17 +464,19 @@ def pr_gmean(results_map: Dict[str, Any], args, timestamps: List[str]): for i, label in enumerate(labels): y[i].append( pr_round(results_map[timestamp][label]) if label in - results_map[timestamp] else Datapoint('', '')) - if args.format == 'csv': - print(','.join(['# Datetime(UTC)'] + [ - f"Speedup({title}/{args.baseline.capitalize()} {base_backend_name}),StdDev" - for title in titles - ])) + results_map[timestamp] else Datapoint(None, None)) + if args.format == 'csv' or args.format == 'tab': + headers = ['Datetime(UTC)'] + [ + header for title in titles for header in + [speedup_header(title, base_backend_name, args), 'StdDev'] + ] + rows = [] for j, x in enumerate(x): - print(','.join( - map(str, [x] + [ - v for i in range(len(labels)) for v in (y[i][j].avg, y[i][j].std) - ]))) + rows += [ + [x] + + [v for i in range(len(labels)) for v in (y[i][j].avg, y[i][j].std)] + ] + pr_text(headers, rows, args) else: fig, ax = plt.subplots(figsize=(args.fig_width, args.fig_height)) ax.axhline(y=1.0, color='lightgray') @@ -458,7 +504,7 @@ def pr_results(results_map: Dict[str, Any], args): timestamps = list(results_map.keys()) timestamps.sort() - if args.format != 'csv' and not has_matplotlib: + if not has_matplotlib and (args.format == 'png' or args.format == 'svg'): sys.exit(f'Fatal: cannot find matplotlib, needed for {args.format} output.') if args.report == 'latest': @@ -532,7 +578,7 @@ def parse_args(args=None): parser.add_argument( "--format", default='csv', - choices=['csv', 'png', 'svg'], + choices=['csv', 'png', 'svg', 'tab'], help='Output format') parser.add_argument('input_file', nargs='+') parser.add_argument( diff --git a/test/benchmarks/v100.inference.histogram.lazytensor.test b/test/benchmarks/v100.inference.histogram.lazytensor.test new file mode 100644 index 00000000000..74bfecd6a5d --- /dev/null +++ b/test/benchmarks/v100.inference.histogram.lazytensor.test @@ -0,0 +1,4 @@ +# ARGS: --backends inductor openxla+lazytensor -- +# Datetime(UTC),Inductor p95,Inductor p50,Inductor p5,XLA+LazyTensor p95,XLA+LazyTensor p50,XLA+LazyTensor p5 +2023-11-11 05:32:18.723407,1.0,1.0,1.0,,, +2023-11-12 05:32:18,1.50833479,1.40761418,1.30689358,0.41071322,0.41071322,0.41071322 diff --git a/test/benchmarks/v100.inference.histogram.lazytensor_tab.test b/test/benchmarks/v100.inference.histogram.lazytensor_tab.test new file mode 100644 index 00000000000..31d1db43836 --- /dev/null +++ b/test/benchmarks/v100.inference.histogram.lazytensor_tab.test @@ -0,0 +1,9 @@ +# ARGS: --backends inductor openxla+lazytensor --format=tab +╒════════════════════════════╤════════════╤════════════╤════════════╤══════════════════╤══════════════════╤══════════════════╕ +│ Datetime(UTC) │ Inductor │ Inductor │ Inductor │ XLA+LazyTensor │ XLA+LazyTensor │ XLA+LazyTensor │ +│ │ p95 │ p50 │ p5 │ p95 │ p50 │ p5 │ +╞════════════════════════════╪════════════╪════════════╪════════════╪══════════════════╪══════════════════╪══════════════════╡ +│ 2023-11-11 05:32:18.723407 │ 1.00 │ 1.00 │ 1.00 │ │ │ │ +├────────────────────────────┼────────────┼────────────┼────────────┼──────────────────┼──────────────────┼──────────────────┤ +│ 2023-11-12 05:32:18 │ 1.51 │ 1.41 │ 1.31 │ 0.41 │ 0.41 │ 0.41 │ +╘════════════════════════════╧════════════╧════════════╧════════════╧══════════════════╧══════════════════╧══════════════════╛ diff --git a/test/benchmarks/v100.inference.histogram.tab.test b/test/benchmarks/v100.inference.histogram.tab.test new file mode 100644 index 00000000000..bd498fb1074 --- /dev/null +++ b/test/benchmarks/v100.inference.histogram.tab.test @@ -0,0 +1,9 @@ +# ARGS: --format=tab +╒════════════════════════════╤════════════╤════════════╤════════════╤══════════════╤══════════════╤══════════════╤═══════════════════╤═══════════════════╤═══════════════════╕ +│ Datetime(UTC) │ Inductor │ Inductor │ Inductor │ XLA+Dynamo │ XLA+Dynamo │ XLA+Dynamo │ XLA_Eval+Dynamo │ XLA_Eval+Dynamo │ XLA_Eval+Dynamo │ +│ │ p95 │ p50 │ p5 │ p95 │ p50 │ p5 │ p95 │ p50 │ p5 │ +╞════════════════════════════╪════════════╪════════════╪════════════╪══════════════╪══════════════╪══════════════╪═══════════════════╪═══════════════════╪═══════════════════╡ +│ 2023-11-11 05:32:18.723407 │ 1.00 │ 1.00 │ 1.00 │ 0.98 │ 0.86 │ 0.74 │ 0.94 │ 0.79 │ 0.65 │ +├────────────────────────────┼────────────┼────────────┼────────────┼──────────────┼──────────────┼──────────────┼───────────────────┼───────────────────┼───────────────────┤ +│ 2023-11-12 05:32:18 │ 1.51 │ 1.41 │ 1.31 │ 1.53 │ 1.17 │ 0.81 │ 1.34 │ 1.05 │ 0.77 │ +╘════════════════════════════╧════════════╧════════════╧════════════╧══════════════╧══════════════╧══════════════╧═══════════════════╧═══════════════════╧═══════════════════╛ diff --git a/test/benchmarks/v100.inference.latest.tab.test b/test/benchmarks/v100.inference.latest.tab.test new file mode 100644 index 00000000000..13af87a79a3 --- /dev/null +++ b/test/benchmarks/v100.inference.latest.tab.test @@ -0,0 +1,12 @@ +# ARGS: --backends inductor openxla+dynamo openxla_eval+dynamo openxla+lazytensor --format=tab +╒════════════╤════════════╤══════════╤════════════════════╤══════════════╤══════════╤════════════════════╤═══════════════════╤══════════╤════════════════════╤══════════════════╤══════════╤════════════════════╕ +│ Workload │ Speedup │ StdDev │ ModelName │ Speedup │ StdDev │ ModelName │ Speedup │ StdDev │ ModelName │ Speedup │ StdDev │ ModelName │ +│ │ Inductor │ │ Inductor │ XLA+Dynamo │ │ XLA+Dynamo │ XLA_Eval+Dynamo │ │ XLA_Eval+Dynamo │ XLA+LazyTensor │ │ XLA+LazyTensor │ +│ │ over │ │ │ over │ │ │ over │ │ │ over │ │ │ +│ │ Oldest │ │ │ Oldest │ │ │ Oldest │ │ │ Oldest │ │ │ +│ │ Inductor │ │ │ Inductor │ │ │ Inductor │ │ │ Inductor │ │ │ +╞════════════╪════════════╪══════════╪════════════════════╪══════════════╪══════════╪════════════════════╪═══════════════════╪══════════╪════════════════════╪══════════════════╪══════════╪════════════════════╡ +│ 0 │ 1.30 │ 0.00 │ Background_Matting │ 0.77 │ 0.00 │ Background_Matting │ 0.73 │ 0.00 │ Background_Matting │ 0.41 │ 0.00 │ Background_Matting │ +├────────────┼────────────┼──────────┼────────────────────┼──────────────┼──────────┼────────────────────┼───────────────────┼──────────┼────────────────────┼──────────────────┼──────────┼────────────────────┤ +│ 1 │ 1.52 │ 0.00 │ BERT_pytorch │ 1.57 │ 0.00 │ BERT_pytorch │ 1.37 │ 0.00 │ BERT_pytorch │ │ │ │ +╘════════════╧════════════╧══════════╧════════════════════╧══════════════╧══════════╧════════════════════╧═══════════════════╧══════════╧════════════════════╧══════════════════╧══════════╧════════════════════╛ diff --git a/test/benchmarks/v100.inference.latest.test b/test/benchmarks/v100.inference.latest.test index 0877cf1c6a7..95a9b95f3c5 100644 --- a/test/benchmarks/v100.inference.latest.test +++ b/test/benchmarks/v100.inference.latest.test @@ -1,3 +1,4 @@ -# Workload,Speedup(Inductor/Oldest Inductor),StdDev,ModelName(Inductor),Speedup(XLA+Dynamo/Oldest Inductor),StdDev,ModelName(XLA+Dynamo),Speedup(XLA_Eval+Dynamo/Oldest Inductor),StdDev,ModelName(XLA_Eval+Dynamo) -0,1.2957024,0.0,Background_Matting,0.77297688,0.0,Background_Matting,0.7341254,0.0,Background_Matting -1,1.51952596,6.914e-05,BERT_pytorch,1.56880282,7.138e-05,BERT_pytorch,1.36859903,6.227e-05,BERT_pytorch +# ARGS: --backends inductor openxla+dynamo openxla_eval+dynamo openxla+lazytensor -- +# Workload,Speedup(Inductor/Oldest Inductor),StdDev,ModelName(Inductor),Speedup(XLA+Dynamo/Oldest Inductor),StdDev,ModelName(XLA+Dynamo),Speedup(XLA_Eval+Dynamo/Oldest Inductor),StdDev,ModelName(XLA_Eval+Dynamo),Speedup(XLA+LazyTensor/Oldest Inductor),StdDev,ModelName(XLA+LazyTensor) +0,1.2957024,0.0,Background_Matting,0.77297688,0.0,Background_Matting,0.7341254,0.0,Background_Matting,0.41071322,0.0,Background_Matting +1,1.51952596,6.914e-05,BERT_pytorch,1.56880282,7.138e-05,BERT_pytorch,1.36859903,6.227e-05,BERT_pytorch,,, diff --git a/test/benchmarks/v100.inference.speedup.lazytensor.test b/test/benchmarks/v100.inference.speedup.lazytensor.test new file mode 100644 index 00000000000..fd286f1313b --- /dev/null +++ b/test/benchmarks/v100.inference.speedup.lazytensor.test @@ -0,0 +1,4 @@ +# ARGS: --backends inductor openxla+lazytensor -- +# Datetime(UTC),Speedup(Inductor/Oldest Inductor),StdDev,Speedup(XLA+LazyTensor/Oldest Inductor),StdDev +2023-11-11 05:32:18.723407,1.0,3.217e-05,, +2023-11-12 05:32:18,1.40315838,3.192e-05,0.41071322,0.0 diff --git a/test/benchmarks/v100.inference.speedup.lazytensor_tab.test b/test/benchmarks/v100.inference.speedup.lazytensor_tab.test new file mode 100644 index 00000000000..8eab2e7d90a --- /dev/null +++ b/test/benchmarks/v100.inference.speedup.lazytensor_tab.test @@ -0,0 +1,12 @@ +# ARGS: --backends inductor openxla+lazytensor --format=tab +╒════════════════════════════╤════════════╤══════════╤══════════════════╤══════════╕ +│ Datetime(UTC) │ Speedup │ StdDev │ Speedup │ StdDev │ +│ │ Inductor │ │ XLA+LazyTensor │ │ +│ │ over │ │ over │ │ +│ │ Oldest │ │ Oldest │ │ +│ │ Inductor │ │ Inductor │ │ +╞════════════════════════════╪════════════╪══════════╪══════════════════╪══════════╡ +│ 2023-11-11 05:32:18.723407 │ 1.00 │ 0.00 │ │ │ +├────────────────────────────┼────────────┼──────────┼──────────────────┼──────────┤ +│ 2023-11-12 05:32:18 │ 1.40 │ 0.00 │ 0.41 │ 0.00 │ +╘════════════════════════════╧════════════╧══════════╧══════════════════╧══════════╛ diff --git a/test/benchmarks/v100.inference.speedup.tab.test b/test/benchmarks/v100.inference.speedup.tab.test new file mode 100644 index 00000000000..84bb368b5bf --- /dev/null +++ b/test/benchmarks/v100.inference.speedup.tab.test @@ -0,0 +1,12 @@ +# ARGS: --format=tab +╒════════════════════════════╤════════════╤══════════╤══════════════╤══════════╤═══════════════════╤══════════╕ +│ Datetime(UTC) │ Speedup │ StdDev │ Speedup │ StdDev │ Speedup │ StdDev │ +│ │ Inductor │ │ XLA+Dynamo │ │ XLA_Eval+Dynamo │ │ +│ │ over │ │ over │ │ over │ │ +│ │ Oldest │ │ Oldest │ │ Oldest │ │ +│ │ Inductor │ │ Inductor │ │ Inductor │ │ +╞════════════════════════════╪════════════╪══════════╪══════════════╪══════════╪═══════════════════╪══════════╡ +│ 2023-11-11 05:32:18.723407 │ 1.00 │ 0.00 │ 0.85 │ 0.00 │ 0.78 │ 0.00 │ +├────────────────────────────┼────────────┼──────────┼──────────────┼──────────┼───────────────────┼──────────┤ +│ 2023-11-12 05:32:18 │ 1.40 │ 0.00 │ 1.10 │ 0.00 │ 1.00 │ 0.00 │ +╘════════════════════════════╧════════════╧══════════╧══════════════╧══════════╧═══════════════════╧══════════╛ diff --git a/test/benchmarks/v100.jsonl b/test/benchmarks/v100.jsonl index 2f78186be2c..38aa717dea6 100644 --- a/test/benchmarks/v100.jsonl +++ b/test/benchmarks/v100.jsonl @@ -15,6 +15,7 @@ {"model": {"suite_name": "torchbench", "model_name": "BERT_pytorch"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": null, "xla_flags": null, "dynamo": "inductor", "test": "train", "batch_size": 16}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.09635835910215974, 0.09635835910215974, 0.09635835910215974], "per_iter_time": [15.150130984999123, 0.09635835910215974, 0.09635835910215974, 0.09635835910215974]}, "outputs_file": null, "timestamp": 1699767138} {"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla", "test": "train", "batch_size": null}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": null, "per_iter_time": null}, "outputs_file": null, "timestamp": 1699767138} {"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla", "test": "eval", "batch_size": 1}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.05668705682083964, 0.05668705682083964, 0.05668705682083964], "per_iter_time": [15.150130984999123, 0.05668705682083964, 0.05668705682083964, 0.05668705682083964]}, "outputs_file": null, "timestamp": 1699767138} +{"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": "PJRT", "xla_flags": null, "dynamo": null, "test": "eval", "batch_size": 1}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [10.150130984999123, 0.10668705682083964, 0.10668705682083964, 0.10668705682083964], "per_iter_time": [10.150130984999123, 0.10668705682083964, 0.10668705682083964, 0.10668705682083964]}, "outputs_file": null, "timestamp": 1699767138} {"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla_eval", "test": "eval", "batch_size": 1}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.05968705682083964, 0.05968705682083964, 0.05968705682083964], "per_iter_time": [15.150130984999123, 0.05968705682083964, 0.05968705682083964, 0.05968705682083964]}, "outputs_file": null, "timestamp": 1699767138} {"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": null, "xla_flags": null, "dynamo": "inductor", "test": "eval", "batch_size": 1}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.033817784171551466, 0.033817784171551466, 0.033817784171551466], "per_iter_time": [15.150130984999123, 0.033817784171551466, 0.033817784171551466, 0.033817784171551466]}, "outputs_file": null, "timestamp": 1699767138} {"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": null, "xla_flags": null, "dynamo": "inductor", "test": "train", "batch_size": 4}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.35498354313895106, 0.35498354313895106, 0.35498354313895106], "per_iter_time": [15.150130984999123, 0.35498354313895106, 0.35498354313895106, 0.35498354313895106]}, "outputs_file": null, "timestamp": 1699767138}