From f2ed93b1683cf4f5c4ca61ae33a143f586c92356 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Tue, 10 Jan 2023 19:29:29 +0000 Subject: [PATCH 01/31] Initial commit with dummy model benchmark --- benchmarks/benchmark_experiment.py | 79 +++++++++++++ benchmarks/benchmark_model.py | 72 ++++++++++++ benchmarks/experiment_runner.py | 183 +++++++++++++++++++++++++++++ benchmarks/utils.py | 54 +++++++++ 4 files changed, 388 insertions(+) create mode 100644 benchmarks/benchmark_experiment.py create mode 100644 benchmarks/benchmark_model.py create mode 100644 benchmarks/experiment_runner.py create mode 100644 benchmarks/utils.py diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py new file mode 100644 index 00000000000..aac538ae645 --- /dev/null +++ b/benchmarks/benchmark_experiment.py @@ -0,0 +1,79 @@ +import logging +import os +import torch +import torch_xla.core.xla_model as xm + +try: + from .utils import is_tpu_available +except ImportError: + from utils import is_tpu_available + +logger = logging.getLogger(__name__) + + +class ExperimentLoader: + + def __init__(self, args): + self._args = args + + def list_experiment_configs(self): + experiment_configs = [] + + accelerators = ["cpu"] + + if torch.cuda.is_available(): + accelerators.append("gpu") + + if is_tpu_available(): + accelerators.append("tpu") + + xla_options = [None, "PJRT"] + tests = ["eval", "train"] + for accelerator in accelerators: + for xla in xla_options: + for test in tests: + experiment_config = {"accelerator": accelerator, "xla": xla, "test": test} + if self._is_valid(experiment_config): + self._add_experiment_env(experiment_config) + experiment_configs.append(experiment_config) + return experiment_configs + + def _is_valid(self, experiment_config): + if experiment_config["accelerator"] == "tpu" and not experiment_config["xla"]: + return False + return True + + def _add_experiment_env(self, experiment_config): + process_env = None + if experiment_config["xla"] == "PJRT": + process_env = os.environ.copy() + process_env["PJRT_DEVICE"] = experiment_config["accelerator"].upper() + experiment_config["process_env"] = process_env + + def load_experiment(self, experiment_config): + accelerator = experiment_config.get("accelerator", "cpu") + xla = experiment_config.get("xla", None) + test = experiment_config.get("test", "eval") + benchmark_experiment = BenchmarkExperiment(accelerator=accelerator, xla=xla, test=test) + + return benchmark_experiment + + +class BenchmarkExperiment: + + def __init__(self, accelerator, xla, test): + self.accelerator = accelerator + self.xla = xla + self.test = test + + def get_device(self): + if self.xla: + device = xm.xla_device(devkind=self.accelerator.upper()) + elif self.accelerator == "cpu": + device = torch.device("cpu") + elif self.accelerator == "gpu": + device = torch.device("cuda") + else: + raise NotImplementedError + + return device \ No newline at end of file diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py new file mode 100644 index 00000000000..a965d65d124 --- /dev/null +++ b/benchmarks/benchmark_model.py @@ -0,0 +1,72 @@ +import logging +import types + + +logger = logging.getLogger(__name__) + + +class ModelLoader: + + def __init__(self, args): + self._args = args + self.suite_name = self._args.suite_name + + def list_model_configs(self): + model_configs = [ + {"model_name": "dummy"}, + ] + + return model_configs + + def is_compatible(self, model_config, experiment_config): + return True + + def load_model(self, model_config, benchmark_experiment): + if model_config["model_name"] != "dummy": + raise NotImplementedError + + device = benchmark_experiment.get_device() + + benchmark_model = BenchmarkModel(module=None, example_input=None, optimizer=None, device=device, benchmark_experiment=benchmark_experiment) + + def train(self): + print(self.benchmark_experiment.accelerator, self.benchmark_experiment.xla, self.benchmark_experiment.test) + print(self.device) + if self.benchmark_experiment.xla: + import torch_xla.core.xla_model as xm + print(xm.xla_real_devices([self.device])) + return None + + def eval(self): + print(self.benchmark_experiment.accelerator, self.benchmark_experiment.xla, self.benchmark_experiment.test) + print(self.device) + if self.benchmark_experiment.xla: + import torch_xla.core.xla_model as xm + print(xm.xla_real_devices([self.device])) + return None + + benchmark_model.train = types.MethodType(train, benchmark_model) + benchmark_model.eval = types.MethodType(eval, benchmark_model) + + return benchmark_model + +class TorchBenchModelLoader(ModelLoader): + + def __init__(self, args): + super(TorchBenchModelLoader, self).__init__(args) + + +class BenchmarkModel: + + def __init__(self, module, example_input, optimizer, device, benchmark_experiment): + self.module = module + self.example_input = example_input + self.optimizer = optimizer + self.device = device + self.benchmark_experiment = benchmark_experiment + + def train(self): + return + + def eval(self): + return diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py new file mode 100644 index 00000000000..7550fbbe003 --- /dev/null +++ b/benchmarks/experiment_runner.py @@ -0,0 +1,183 @@ +import argparse +from collections import OrderedDict +import json +import logging +import numpy as np +import subprocess +import sys +import time +import torch +import torch_xla.core.xla_model as xm +import types + +try: + from .benchmark_model import TorchBenchModelLoader, ModelLoader + from .benchmark_experiment import ExperimentLoader + from .utils import patch_torch_manual_seed ,reset_rng_state +except ImportError: + from benchmark_model import TorchBenchModelLoader, ModelLoader + from benchmark_experiment import ExperimentLoader + from utils import patch_torch_manual_seed ,reset_rng_state + +logger = logging.getLogger(__name__) + + +class ExperimentRunner: + + def __init__(self, args): + self._args = args + self.suite_name = self._args.suite_name + + self.experiment_loader = ExperimentLoader(self._args) + + if self.suite_name == "torchbench": + self.model_loader = TorchBenchModelLoader(self._args) + elif self.suite_name == "dummy": + self.model_loader = ModelLoader(self._args) + else: + raise NotImplementedError + + # initialize output directory from args + # self.output_dir + + def run(self): + if self._args.experiment_config and self._args.model_config: + experiment_config = json.loads(self._args.experiment_config) + model_config = json.loads(self._args.model_config) + self.run_single_experiment(experiment_config, model_config) + else: + assert not self._args.experiment_config and not self._args.model_config + + experiment_configs = self.experiment_loader.list_experiment_configs() + model_configs = self.model_loader.list_model_configs() + for model_config in model_configs: + for experiment_config in experiment_configs: + if self.model_loader.is_compatible(model_config, experiment_config): + process_env = experiment_config.pop("process_env") + experiment_config_str = json.dumps(experiment_config) + model_config_str = json.dumps(model_config) + try: + subprocess.check_call( + [sys.executable] + sys.argv + + [f"--experiment-config={experiment_config_str}"] + + [f"--model-config={model_config_str}"], + timeout=60 * 20, + env=process_env, + ) + except subprocess.TimeoutExpired: + logger.error("TIMEOUT") + except subprocess.SubprocessError: + logger.error("ERROR") + + else: + logger.warning("SKIP because of incompatible configs.") + + def run_single_experiment(self, experiment_config, model_config): + benchmark_experiment = self.experiment_loader.load_experiment( + experiment_config + ) + benchmark_model = self.model_loader.load_model( + model_config, benchmark_experiment + ) + + if benchmark_experiment.test == "train": + benchmark_model.model_iter_fn = benchmark_model.train + elif benchmark_experiment.test == "eval": + benchmark_model.model_iter_fn = benchmark_model.eval + else: + raise NotImplementedError + + timings = OrderedDict() + results = [] + for i in range(self._args.repeat): + timing, result = self.timed_iteration( + benchmark_experiment, benchmark_model + ) + results.append(result) + for key, val in timing.items(): + if i == 0: + timings[key] = np.zeros(self._args.repeat, np.float64) + timings[key][i] = val + + # save the config, timings and results to proper files in self.output_dir + # self.save_results(timings, results) + + def _maybe_mark_step(self, benchmark_experiment): + if benchmark_experiment.xla: + xm.mark_step() + + def _synchronize(self, benchmark_experiment): + if benchmark_experiment.xla: + xm.wait_device_ops() + elif benchmark_experiment.accelerator == "gpu": + torch.cuda.synchronize() + else: + pass + + def timed_iteration(self, benchmark_experiment, benchmark_model): + reset_rng_state() + + self._maybe_mark_step(benchmark_experiment) + self._synchronize(benchmark_experiment) + + timing = OrderedDict() + t_start = time.perf_counter() + + result = benchmark_model.model_iter_fn() + + if benchmark_experiment.xla: + t_trace = time.perf_counter() + + self._maybe_mark_step(benchmark_experiment) + self._synchronize(benchmark_experiment) + + t_end = time.perf_counter() + + timing["total"] = t_end - t_start + if benchmark_experiment.xla: + timing["trace"] = t_trace - t_start + + return timing, result + + +def parse_args(args=None): + parser = argparse.ArgumentParser() + + parser.add_argument( + "--suite-name", + required=True, + choices=["dummy", "torchbench"], + help="Suite name for the model garden.", + ) + + parser.add_argument( + "--repeat", + type=int, + default=10, + help="Number of times to repeat the iteration.", + ) + + parser.add_argument( + "--experiment-config", + type=str, + help="JSON string of the experiment config dict.", + ) + + parser.add_argument( + "--model-config", + type=str, + help="JSON string of the model config dict.", + ) + + return parser.parse_args(args) + + +def main(): + args = parse_args() + print(args) + runner = ExperimentRunner(args) + runner.run() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/benchmarks/utils.py b/benchmarks/utils.py new file mode 100644 index 00000000000..a9ffb97b0a6 --- /dev/null +++ b/benchmarks/utils.py @@ -0,0 +1,54 @@ +import functools +from multiprocessing import Process, Queue +import numpy as np +import queue +import random +import torch +import traceback + + +@functools.lru_cache(None) +def patch_torch_manual_seed(): + """Make torch manual seed deterministic. Helps with accuracy testing.""" + + def deterministic_torch_manual_seed(*args, **kwargs): + from torch._C import default_generator + + seed = 1337 + import torch.cuda + + if not torch.cuda._is_in_bad_fork(): + torch.cuda.manual_seed_all(seed) + return default_generator.manual_seed(seed) + + torch.manual_seed = deterministic_torch_manual_seed + +def reset_rng_state(): + torch.manual_seed(1337) + random.seed(1337) + np.random.seed(1337) + +@functools.lru_cache(maxsize=1) +def is_tpu_available(): + + def _check_tpu(q): + try: + import os + os.environ["PJRT_DEVICE"] = "TPU" + + import torch_xla.core.xla_model as xm + + q.put((xm.xrt_world_size() > 1) or bool(xm.get_xla_supported_devices("TPU"))) + except Exception: + traceback.print_exc() + q.put(None) + + q = Queue() + process = Process(target=_check_tpu, args=(q,)) + process.start() + process.join(60) + try: + return q.get_nowait() + except queue.Empty: + traceback.print_exc() + return False From 5984de6d8033143ead6082ad5f0199e7f23b9e97 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Tue, 10 Jan 2023 22:42:39 +0000 Subject: [PATCH 02/31] add XRT support --- benchmarks/benchmark_experiment.py | 16 ++++++++++++--- benchmarks/experiment_runner.py | 31 ++++++++++++++++++++---------- benchmarks/utils.py | 15 +++++++++++++++ 3 files changed, 49 insertions(+), 13 deletions(-) diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py index aac538ae645..85f228dba22 100644 --- a/benchmarks/benchmark_experiment.py +++ b/benchmarks/benchmark_experiment.py @@ -27,27 +27,37 @@ def list_experiment_configs(self): if is_tpu_available(): accelerators.append("tpu") - xla_options = [None, "PJRT"] + xla_options = [None, "PJRT", "XRT"] tests = ["eval", "train"] for accelerator in accelerators: for xla in xla_options: for test in tests: experiment_config = {"accelerator": accelerator, "xla": xla, "test": test} if self._is_valid(experiment_config): - self._add_experiment_env(experiment_config) + self._add_experiment_env(experiment_config, accelerators) experiment_configs.append(experiment_config) return experiment_configs def _is_valid(self, experiment_config): if experiment_config["accelerator"] == "tpu" and not experiment_config["xla"]: return False + if experiment_config["accelerator"] == "gpu" and experiment_config["xla"] == "PJRT": + return False return True - def _add_experiment_env(self, experiment_config): + def _add_experiment_env(self, experiment_config, accelerators): process_env = None if experiment_config["xla"] == "PJRT": process_env = os.environ.copy() process_env["PJRT_DEVICE"] = experiment_config["accelerator"].upper() + elif experiment_config["xla"] == "XRT": + process_env = os.environ.copy() + if "tpu" in accelerators: + process_env["TPU_NUM_DEVICES"] = "1" + process_env["XRT_TPU_CONFIG"] = "localservice;0;localhost:51011" + elif "gpu" in accelerators: + process_env["GPU_NUM_DEVICES"] = "1" + experiment_config["process_env"] = process_env def load_experiment(self, experiment_config): diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index 7550fbbe003..ac78b58860a 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -13,11 +13,11 @@ try: from .benchmark_model import TorchBenchModelLoader, ModelLoader from .benchmark_experiment import ExperimentLoader - from .utils import patch_torch_manual_seed ,reset_rng_state + from .utils import patch_torch_manual_seed, reset_rng_state, move_to_device except ImportError: from benchmark_model import TorchBenchModelLoader, ModelLoader from benchmark_experiment import ExperimentLoader - from utils import patch_torch_manual_seed ,reset_rng_state + from utils import patch_torch_manual_seed, reset_rng_state, move_to_device logger = logging.getLogger(__name__) @@ -93,6 +93,7 @@ def run_single_experiment(self, experiment_config, model_config): timing, result = self.timed_iteration( benchmark_experiment, benchmark_model ) + result = move_to_device(result, 'cpu') results.append(result) for key, val in timing.items(): if i == 0: @@ -102,7 +103,7 @@ def run_single_experiment(self, experiment_config, model_config): # save the config, timings and results to proper files in self.output_dir # self.save_results(timings, results) - def _maybe_mark_step(self, benchmark_experiment): + def _mark_step(self, benchmark_experiment): if benchmark_experiment.xla: xm.mark_step() @@ -117,24 +118,27 @@ def _synchronize(self, benchmark_experiment): def timed_iteration(self, benchmark_experiment, benchmark_model): reset_rng_state() - self._maybe_mark_step(benchmark_experiment) + self._mark_step(benchmark_experiment) self._synchronize(benchmark_experiment) timing = OrderedDict() t_start = time.perf_counter() - result = benchmark_model.model_iter_fn() + for i in range(self._args.repeat_inner): + result = benchmark_model.model_iter_fn() - if benchmark_experiment.xla: - t_trace = time.perf_counter() + if benchmark_experiment.xla and self._args.repeat_inner == 1: + t_trace = time.perf_counter() + + self._mark_step(benchmark_experiment) - self._maybe_mark_step(benchmark_experiment) self._synchronize(benchmark_experiment) t_end = time.perf_counter() timing["total"] = t_end - t_start - if benchmark_experiment.xla: + timing["average"] = timing["total"] / self._args.repeat_inner + if benchmark_experiment.xla and self._args.repeat_inner == 1: timing["trace"] = t_trace - t_start return timing, result @@ -154,7 +158,14 @@ def parse_args(args=None): "--repeat", type=int, default=10, - help="Number of times to repeat the iteration.", + help="Number of times to repeat the timed iteration.", + ) + + parser.add_argument( + "--repeat-inner", + type=int, + default=1, + help="Number of times to repeat the model function inside the timed iteration.", ) parser.add_argument( diff --git a/benchmarks/utils.py b/benchmarks/utils.py index a9ffb97b0a6..d8abc544fdb 100644 --- a/benchmarks/utils.py +++ b/benchmarks/utils.py @@ -23,11 +23,13 @@ def deterministic_torch_manual_seed(*args, **kwargs): torch.manual_seed = deterministic_torch_manual_seed + def reset_rng_state(): torch.manual_seed(1337) random.seed(1337) np.random.seed(1337) + @functools.lru_cache(maxsize=1) def is_tpu_available(): @@ -52,3 +54,16 @@ def _check_tpu(q): except queue.Empty: traceback.print_exc() return False + + +def move_to_device(item, device): + if isinstance(item, torch.Tensor): + return item.to(device=device) + elif isinstance(item, list): + return [move_to_device(t, device) for t in item] + elif isinstance(item, tuple): + return tuple(move_to_device(t, device) for t in item) + elif isinstance(item, dict): + return dict((k, move_to_device(t, device)) for k, t in item.items()) + else: + return item From 8d0f78e850398e2c45ce8945ddaa6b5b2cbd4240 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Thu, 12 Jan 2023 00:59:44 +0000 Subject: [PATCH 03/31] Add torchbench benchmark models --- benchmarks/benchmark_experiment.py | 12 ++- benchmarks/benchmark_model.py | 146 +++++++++++++++++++++-------- benchmarks/experiment_runner.py | 84 +++++++++++++---- benchmarks/torchbench_model.py | 107 +++++++++++++++++++++ benchmarks/{utils.py => util.py} | 13 +++ 5 files changed, 300 insertions(+), 62 deletions(-) create mode 100644 benchmarks/torchbench_model.py rename benchmarks/{utils.py => util.py} (88%) diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py index 85f228dba22..2b074a917fd 100644 --- a/benchmarks/benchmark_experiment.py +++ b/benchmarks/benchmark_experiment.py @@ -1,12 +1,11 @@ import logging import os import torch -import torch_xla.core.xla_model as xm try: - from .utils import is_tpu_available + from .util import is_tpu_available except ImportError: - from utils import is_tpu_available + from util import is_tpu_available logger = logging.getLogger(__name__) @@ -78,6 +77,7 @@ def __init__(self, accelerator, xla, test): def get_device(self): if self.xla: + import torch_xla.core.xla_model as xm device = xm.xla_device(devkind=self.accelerator.upper()) elif self.accelerator == "cpu": device = torch.device("cpu") @@ -86,4 +86,8 @@ def get_device(self): else: raise NotImplementedError - return device \ No newline at end of file + return device + + @property + def filename_str(self): + return f"{self.accelerator}-{self.xla}-{self.test}" \ No newline at end of file diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index a965d65d124..cfaca6c4756 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -1,6 +1,16 @@ import logging +import re +import torch +import torch.nn as nn +from torch._dynamo.testing import collect_results, reduce_to_scalar_loss +from torch._dynamo.utils import clone_inputs import types +try: + from .util import move_to_device +except ImportError: + from util import move_to_device + logger = logging.getLogger(__name__) @@ -21,52 +31,112 @@ def list_model_configs(self): def is_compatible(self, model_config, experiment_config): return True - def load_model(self, model_config, benchmark_experiment): - if model_config["model_name"] != "dummy": - raise NotImplementedError - - device = benchmark_experiment.get_device() + def get_benchmark_indices(self, length): + start = self._args.partition_id * (length // self._args.total_partitions) + end = ( + (self._args.partition_id + 1) * (length // self._args.total_partitions) + if self._args.partition_id < self._args.total_partitions - 1 + else length + ) + return start, end - benchmark_model = BenchmarkModel(module=None, example_input=None, optimizer=None, device=device, benchmark_experiment=benchmark_experiment) + def skip_model(self, model_name): + return (not re.search("|".join(self._args.filter), model_name, re.I) or + re.search("|".join(self._args.exclude), model_name, re.I)) - def train(self): - print(self.benchmark_experiment.accelerator, self.benchmark_experiment.xla, self.benchmark_experiment.test) - print(self.device) - if self.benchmark_experiment.xla: - import torch_xla.core.xla_model as xm - print(xm.xla_real_devices([self.device])) - return None - - def eval(self): - print(self.benchmark_experiment.accelerator, self.benchmark_experiment.xla, self.benchmark_experiment.test) - print(self.device) - if self.benchmark_experiment.xla: - import torch_xla.core.xla_model as xm - print(xm.xla_real_devices([self.device])) - return None - - benchmark_model.train = types.MethodType(train, benchmark_model) - benchmark_model.eval = types.MethodType(eval, benchmark_model) + def load_model(self, model_config, benchmark_experiment): + suite_name = self.suite_name + model_name = model_config["model_name"] + optimizer_name = model_config.get("optimizer_name", "Adam") + batch_size = self._args.batch_size + benchmark_model = BenchmarkModel( + suite_name=suite_name, + model_name=model_name, + optimizer_name=optimizer_name, + batch_size=batch_size, + benchmark_experiment=benchmark_experiment, + ) + + benchmark_model.set_up() + benchmark_model.prepare_for_experiment() return benchmark_model -class TorchBenchModelLoader(ModelLoader): - - def __init__(self, args): - super(TorchBenchModelLoader, self).__init__(args) - class BenchmarkModel: - def __init__(self, module, example_input, optimizer, device, benchmark_experiment): - self.module = module - self.example_input = example_input - self.optimizer = optimizer - self.device = device + def __init__(self, suite_name, model_name, optimizer_name, batch_size, + benchmark_experiment): + self.suite_name = suite_name + self.model_name = model_name + self.optimizer_name = optimizer_name + self.batch_size = batch_size self.benchmark_experiment = benchmark_experiment - def train(self): - return + def set_up(self): + """Set up module, actual batch_size, example_inputs, and optimizer_class + + This is model suite specific. + """ + if self.model_name != "dummy": + raise NotImplementedError + + self.module = nn.Sequential( + nn.Linear(3, 5), + nn.ReLU(), + nn.Linear(5, 5), + nn.ReLU(), + nn.Linear(5, 3), + nn.Softmax(dim=1), + ) + + self.batch_size = 10 + self.example_inputs = (torch.rand(self.batch_size, 3),) + self.optimizer_class = torch.optim.Adam + + def prepare_for_experiment(self): + self.device = self.benchmark_experiment.get_device() + self.module = self.module.to(self.device) + self.example_inputs = move_to_device(self.example_inputs, self.device) + + if self.benchmark_experiment.test == "eval": + self.module.eval() + self.model_iter_fn = self.eval + self.optimizer = None + elif self.benchmark_experiment.test == "train": + self.module.train() + self.model_iter_fn = self.train + self.optimizer = self.optimizer_class(self.module.parameters(), lr=0.01) + else: + raise NotImplementedError - def eval(self): - return + def optimizer_zero_grad(self): + if self.optimizer is not None: + self.optimizer.zero_grad(True) + else: + self.module.zero_grad(True) + + def optimizer_step(self): + if self.optimizer is not None: + self.optimizer.step() + + def compute_loss(self, pred): + return reduce_to_scalar_loss(pred) + + def train(self, collect_outputs=True): + cloned_inputs = clone_inputs(self.example_inputs) + self.optimizer_zero_grad() + pred = self.module(*cloned_inputs) + loss = self.compute_loss(pred) + loss.backward() + self.optimizer_step() + if collect_outputs: + return collect_results(self.module, pred, loss, cloned_inputs) + return None + + def eval(self, collect_outputs=True): + return self.module(*self.example_inputs) + + @property + def filename_str(self): + return f"{self.suite_name}-{self.model_name}-{self.optimizer_name}-{self.batch_size}" \ No newline at end of file diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index ac78b58860a..4363bec50ec 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -7,17 +7,18 @@ import sys import time import torch -import torch_xla.core.xla_model as xm import types try: - from .benchmark_model import TorchBenchModelLoader, ModelLoader + from .benchmark_model import ModelLoader + from .torchbench_model import TorchBenchModelLoader from .benchmark_experiment import ExperimentLoader - from .utils import patch_torch_manual_seed, reset_rng_state, move_to_device + from .util import patch_torch_manual_seed, reset_rng_state, move_to_device except ImportError: - from benchmark_model import TorchBenchModelLoader, ModelLoader + from benchmark_model import ModelLoader + from torchbench_model import TorchBenchModelLoader from benchmark_experiment import ExperimentLoader - from utils import patch_torch_manual_seed, reset_rng_state, move_to_device + from util import patch_torch_manual_seed, reset_rng_state, move_to_device logger = logging.getLogger(__name__) @@ -37,11 +38,14 @@ def __init__(self, args): else: raise NotImplementedError - # initialize output directory from args + # TODO: initialize output directory from args # self.output_dir def run(self): if self._args.experiment_config and self._args.model_config: + if self._args.dry_run: + logger.info(f"Dry run with {[sys.executable] + sys.argv}") + return experiment_config = json.loads(self._args.experiment_config) model_config = json.loads(self._args.model_config) self.run_single_experiment(experiment_config, model_config) @@ -56,11 +60,16 @@ def run(self): process_env = experiment_config.pop("process_env") experiment_config_str = json.dumps(experiment_config) model_config_str = json.dumps(model_config) + experiment_config["process_env"] = process_env + command = ([sys.executable] + sys.argv + + [f"--experiment-config={experiment_config_str}"] + + [f"--model-config={model_config_str}"]) + if self._args.dry_run: + logger.info(f"Dry run with {command}") + continue try: subprocess.check_call( - [sys.executable] + sys.argv + - [f"--experiment-config={experiment_config_str}"] + - [f"--model-config={model_config_str}"], + command, timeout=60 * 20, env=process_env, ) @@ -80,13 +89,6 @@ def run_single_experiment(self, experiment_config, model_config): model_config, benchmark_experiment ) - if benchmark_experiment.test == "train": - benchmark_model.model_iter_fn = benchmark_model.train - elif benchmark_experiment.test == "eval": - benchmark_model.model_iter_fn = benchmark_model.eval - else: - raise NotImplementedError - timings = OrderedDict() results = [] for i in range(self._args.repeat): @@ -100,15 +102,19 @@ def run_single_experiment(self, experiment_config, model_config): timings[key] = np.zeros(self._args.repeat, np.float64) timings[key][i] = val - # save the config, timings and results to proper files in self.output_dir + # TODO: save the config, timings and results to proper files in self.output_dir + logger.info(f"{benchmark_model.filename_str}-{benchmark_experiment.filename_str}") + print(timings) # self.save_results(timings, results) def _mark_step(self, benchmark_experiment): if benchmark_experiment.xla: + import torch_xla.core.xla_model as xm xm.mark_step() def _synchronize(self, benchmark_experiment): if benchmark_experiment.xla: + import torch_xla.core.xla_model as xm xm.wait_device_ops() elif benchmark_experiment.accelerator == "gpu": torch.cuda.synchronize() @@ -125,7 +131,7 @@ def timed_iteration(self, benchmark_experiment, benchmark_model): t_start = time.perf_counter() for i in range(self._args.repeat_inner): - result = benchmark_model.model_iter_fn() + result = benchmark_model.model_iter_fn(collect_outputs=False) if benchmark_experiment.xla and self._args.repeat_inner == 1: t_trace = time.perf_counter() @@ -154,6 +160,13 @@ def parse_args(args=None): help="Suite name for the model garden.", ) + parser.add_argument( + "--filter", "-k", action="append", help="filter benchmarks with regexp" + ) + parser.add_argument( + "--exclude", "-x", action="append", help="filter benchmarks with regexp" + ) + parser.add_argument( "--repeat", type=int, @@ -168,6 +181,32 @@ def parse_args(args=None): help="Number of times to repeat the model function inside the timed iteration.", ) + parser.add_argument( + "--batch_size", + type=int, + help="Batch size to be used. If not provided, it depends on the model suites to determine it.", + ) + + parser.add_argument( + "--total-partitions", + type=int, + default=1, + choices=range(1, 10), + help="Total number of partitions we want to divide the benchmark suite into", + ) + parser.add_argument( + "--partition-id", + type=int, + default=0, + help="ID of the benchmark suite partition to be run. Used to divide CI tasks", + ) + + parser.add_argument( + "--dry-run", + action="store_true", + help="Do a dry run to only print the benchmark commands.", + ) + parser.add_argument( "--experiment-config", type=str, @@ -185,10 +224,15 @@ def parse_args(args=None): def main(): args = parse_args() - print(args) + + args.filter = args.filter or [r"."] + args.exclude = args.exclude or [r"^$"] + + logger.info(args) runner = ExperimentRunner(args) runner.run() if __name__ == "__main__": - main() \ No newline at end of file + logging.basicConfig(level=logging.INFO, force=True) + main() \ No newline at end of file diff --git a/benchmarks/torchbench_model.py b/benchmarks/torchbench_model.py new file mode 100644 index 00000000000..27abd1d19ab --- /dev/null +++ b/benchmarks/torchbench_model.py @@ -0,0 +1,107 @@ +import logging +import os +from os.path import abspath, exists +import sys +import torch +import torch.nn as nn +from torch._dynamo.testing import collect_results, reduce_to_scalar_loss +from torch._dynamo.utils import clone_inputs +import types + +try: + from .util import move_to_device, set_cwd + from .benchmark_model import ModelLoader, BenchmarkModel +except ImportError: + from util import move_to_device, set_cwd + from benchmark_model import ModelLoader, BenchmarkModel + + +logger = logging.getLogger(__name__) + + +class TorchBenchModelLoader(ModelLoader): + + def __init__(self, args): + super().__init__(args) + + self.torchbench_dir = self.add_torchbench_dir() + + def add_torchbench_dir(): + os.environ["KALDI_ROOT"] = "/tmp" # avoids some spam + for torchbench_dir in ( + "./torchbenchmark", + "./torchbench", + "./benchmark", + "../torchbenchmark", + "../torchbench", + "../benchmark", + "../../torchbenchmark", + "../../torchbench", + "../../benchmark", + ): + if exists(torchbench_dir): + break + + if exists(torchbench_dir): + torchbench_dir = abspath(torchbench_dir) + if torchbench_dir not in sys.path: + sys.path.append(torchbench_dir) + else: + raise Exception("Torch Benchmark folder not found.") + + return torchbench_dir + + def list_model_configs(self): + model_configs = [] + + from torchbenchmark import _list_model_paths + + models = _list_model_paths() + + start, end = self.get_benchmark_indices(len(models)) + models = models[start: end] + for model_path in models: + model_name = os.path.basename(model_path) + if self.skip_model(model_name): + continue + + model_configs.append({"model_name": model_name}) + + return model_configs + + def is_compatible(self, model_config, experiment_config): + return True + + def load_model(self, model_config, benchmark_experiment): + suite_name = self.suite_name + model_name = model_config["model_name"] + optimizer_name = model_config.get("optimizer_name", "Adam") + batch_size = self._args.batch_size + benchmark_model = TorchBenchModel( + suite_name=suite_name, + model_name=model_name, + optimizer_name=optimizer_name, + batch_size=batch_size, + benchmark_experiment=benchmark_experiment, + ) + + benchmark_model.set_up() + benchmark_model.prepare_for_experiment() + + return benchmark_model + + +class TorchBenchModel(BenchmarkModel): + + def __init__(self, suite_name, model_name, optimizer_name, batch_size, + benchmark_experiment): + super().__init__(suite_name, model_name, optimizer_name, batch_size, + benchmark_experiment) + + def set_up(self): + """Set up module, actual batch_size, example_inputs, and optimizer_class + + This is model suite specific. + """ + + diff --git a/benchmarks/utils.py b/benchmarks/util.py similarity index 88% rename from benchmarks/utils.py rename to benchmarks/util.py index d8abc544fdb..8afa5ba5d24 100644 --- a/benchmarks/utils.py +++ b/benchmarks/util.py @@ -1,6 +1,9 @@ +from contextlib import contextmanager import functools from multiprocessing import Process, Queue import numpy as np +import os +from os.path import abspath import queue import random import torch @@ -67,3 +70,13 @@ def move_to_device(item, device): return dict((k, move_to_device(t, device)) for k, t in item.items()) else: return item + + +@contextmanager +def set_cwd(path): + original_dir = abspath(os.getcwd()) + os.chdir(path) + try: + yield + finally: + os.chdir(original_dir) \ No newline at end of file From ca20c6f855fff37e8ba8b8ce947a6accfbc0d21a Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Thu, 12 Jan 2023 20:40:30 +0000 Subject: [PATCH 04/31] add randomize_input --- benchmarks/benchmark_experiment.py | 9 ++++++--- benchmarks/benchmark_model.py | 30 ++++++++++++------------------ benchmarks/experiment_runner.py | 25 ++++++++++++++++++++++--- benchmarks/torchbench_model.py | 10 ++++------ benchmarks/util.py | 30 ++++++++++++++++++++++++++++++ 5 files changed, 74 insertions(+), 30 deletions(-) diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py index 2b074a917fd..e3d3899d5c5 100644 --- a/benchmarks/benchmark_experiment.py +++ b/benchmarks/benchmark_experiment.py @@ -32,9 +32,12 @@ def list_experiment_configs(self): for xla in xla_options: for test in tests: experiment_config = {"accelerator": accelerator, "xla": xla, "test": test} - if self._is_valid(experiment_config): - self._add_experiment_env(experiment_config, accelerators) - experiment_configs.append(experiment_config) + + if not self._is_valid(experiment_config): + continue + + self._add_experiment_env(experiment_config, accelerators) + experiment_configs.append(experiment_config) return experiment_configs def _is_valid(self, experiment_config): diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index cfaca6c4756..a8019f4054e 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -3,7 +3,6 @@ import torch import torch.nn as nn from torch._dynamo.testing import collect_results, reduce_to_scalar_loss -from torch._dynamo.utils import clone_inputs import types try: @@ -47,12 +46,10 @@ def skip_model(self, model_name): def load_model(self, model_config, benchmark_experiment): suite_name = self.suite_name model_name = model_config["model_name"] - optimizer_name = model_config.get("optimizer_name", "Adam") batch_size = self._args.batch_size benchmark_model = BenchmarkModel( suite_name=suite_name, model_name=model_name, - optimizer_name=optimizer_name, batch_size=batch_size, benchmark_experiment=benchmark_experiment, ) @@ -65,11 +62,9 @@ def load_model(self, model_config, benchmark_experiment): class BenchmarkModel: - def __init__(self, suite_name, model_name, optimizer_name, batch_size, - benchmark_experiment): + def __init__(self, suite_name, model_name, batch_size, benchmark_experiment): self.suite_name = suite_name self.model_name = model_name - self.optimizer_name = optimizer_name self.batch_size = batch_size self.benchmark_experiment = benchmark_experiment @@ -82,16 +77,16 @@ def set_up(self): raise NotImplementedError self.module = nn.Sequential( - nn.Linear(3, 5), + nn.Linear(32, 512), nn.ReLU(), - nn.Linear(5, 5), + nn.Linear(512, 512), nn.ReLU(), - nn.Linear(5, 3), + nn.Linear(512, 32), nn.Softmax(dim=1), ) - self.batch_size = 10 - self.example_inputs = (torch.rand(self.batch_size, 3),) + self.batch_size = 16 + self.example_inputs = (torch.rand(self.batch_size, 32),) self.optimizer_class = torch.optim.Adam def prepare_for_experiment(self): @@ -123,20 +118,19 @@ def optimizer_step(self): def compute_loss(self, pred): return reduce_to_scalar_loss(pred) - def train(self, collect_outputs=True): - cloned_inputs = clone_inputs(self.example_inputs) + def train(self, inputs, collect_outputs=True): self.optimizer_zero_grad() - pred = self.module(*cloned_inputs) + pred = self.module(*inputs) loss = self.compute_loss(pred) loss.backward() self.optimizer_step() if collect_outputs: - return collect_results(self.module, pred, loss, cloned_inputs) + return collect_results(self.module, pred, loss, inputs) return None - def eval(self, collect_outputs=True): - return self.module(*self.example_inputs) + def eval(self, inputs, collect_outputs=True): + return self.module(*inputs) @property def filename_str(self): - return f"{self.suite_name}-{self.model_name}-{self.optimizer_name}-{self.batch_size}" \ No newline at end of file + return f"{self.suite_name}-{self.model_name}-{self.batch_size}" \ No newline at end of file diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index 4363bec50ec..6f03aa09dd4 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -1,5 +1,6 @@ import argparse from collections import OrderedDict +import copy import json import logging import numpy as np @@ -13,12 +14,12 @@ from .benchmark_model import ModelLoader from .torchbench_model import TorchBenchModelLoader from .benchmark_experiment import ExperimentLoader - from .util import patch_torch_manual_seed, reset_rng_state, move_to_device + from .util import patch_torch_manual_seed, reset_rng_state, move_to_device, randomize_input except ImportError: from benchmark_model import ModelLoader from torchbench_model import TorchBenchModelLoader from benchmark_experiment import ExperimentLoader - from util import patch_torch_manual_seed, reset_rng_state, move_to_device + from util import patch_torch_manual_seed, reset_rng_state, move_to_device, randomize_input logger = logging.getLogger(__name__) @@ -82,6 +83,7 @@ def run(self): logger.warning("SKIP because of incompatible configs.") def run_single_experiment(self, experiment_config, model_config): + reset_rng_state() benchmark_experiment = self.experiment_loader.load_experiment( experiment_config ) @@ -121,9 +123,20 @@ def _synchronize(self, benchmark_experiment): else: pass + def prepare_inputs(self, example_inputs, should_randomize_input): + inputs_list = [] + for i in range(self._args.repeat_inner): + inputs = copy.deepcopy(example_inputs) + if should_randomize_input: + inputs = randomize_input(inputs) + inputs_list.append(inputs) + return inputs_list + def timed_iteration(self, benchmark_experiment, benchmark_model): reset_rng_state() + inputs_list = self.prepare_inputs(benchmark_model.example_inputs, self._args.randomize_input) + self._mark_step(benchmark_experiment) self._synchronize(benchmark_experiment) @@ -131,7 +144,7 @@ def timed_iteration(self, benchmark_experiment, benchmark_model): t_start = time.perf_counter() for i in range(self._args.repeat_inner): - result = benchmark_model.model_iter_fn(collect_outputs=False) + result = benchmark_model.model_iter_fn(inputs_list[i], collect_outputs=False) if benchmark_experiment.xla and self._args.repeat_inner == 1: t_trace = time.perf_counter() @@ -207,6 +220,12 @@ def parse_args(args=None): help="Do a dry run to only print the benchmark commands.", ) + parser.add_argument( + "--randomize-input", + action="store_true", + help="Whether to randomize the input values. Dimensions will be kept the same.", + ) + parser.add_argument( "--experiment-config", type=str, diff --git a/benchmarks/torchbench_model.py b/benchmarks/torchbench_model.py index 27abd1d19ab..bb72120c3d1 100644 --- a/benchmarks/torchbench_model.py +++ b/benchmarks/torchbench_model.py @@ -62,6 +62,7 @@ def list_model_configs(self): models = models[start: end] for model_path in models: model_name = os.path.basename(model_path) + if self.skip_model(model_name): continue @@ -75,12 +76,10 @@ def is_compatible(self, model_config, experiment_config): def load_model(self, model_config, benchmark_experiment): suite_name = self.suite_name model_name = model_config["model_name"] - optimizer_name = model_config.get("optimizer_name", "Adam") batch_size = self._args.batch_size benchmark_model = TorchBenchModel( suite_name=suite_name, model_name=model_name, - optimizer_name=optimizer_name, batch_size=batch_size, benchmark_experiment=benchmark_experiment, ) @@ -93,15 +92,14 @@ def load_model(self, model_config, benchmark_experiment): class TorchBenchModel(BenchmarkModel): - def __init__(self, suite_name, model_name, optimizer_name, batch_size, - benchmark_experiment): - super().__init__(suite_name, model_name, optimizer_name, batch_size, - benchmark_experiment) + def __init__(self, suite_name, model_name, batch_size, benchmark_experiment): + super().__init__(suite_name, model_name, batch_size, benchmark_experiment) def set_up(self): """Set up module, actual batch_size, example_inputs, and optimizer_class This is model suite specific. """ + self.optimizer_class = torch.optim.Adam diff --git a/benchmarks/util.py b/benchmarks/util.py index 8afa5ba5d24..47a3c49d96b 100644 --- a/benchmarks/util.py +++ b/benchmarks/util.py @@ -1,5 +1,6 @@ from contextlib import contextmanager import functools +import logging from multiprocessing import Process, Queue import numpy as np import os @@ -9,6 +10,8 @@ import torch import traceback +logger = logging.getLogger(__name__) + @functools.lru_cache(None) def patch_torch_manual_seed(): @@ -72,6 +75,33 @@ def move_to_device(item, device): return item +def randomize_input(inputs): + if isinstance(inputs, torch.Tensor): + if inputs.dtype in (torch.float32, torch.float64): + torch._dynamo.utils.counters["randomize_input"]["times"] += 1 + return torch.randn_like(inputs) + elif inputs.dtype == torch.int64: + # Note: we can not simply tune integer tensors as follows + # `return torch.randint_like(inputs, high=inputs.max().item())` + # This may break some invariants between tensors. + # E.g. in embedding lookup case, one tensor is the length + # and another is an indices tensor. + return inputs + else: + raise RuntimeError( + f"randomize_input need support tensor of type {inputs.dtype}" + ) + elif isinstance(inputs, (list, tuple)): + return type(inputs)([randomize_input(x) for x in inputs]) + elif isinstance(inputs, dict): + return dict((k, randomize_input(x)) for k, x in inputs.items()) + else: + logger.warning( + f"randomize_input can not handle input of type {type(inputs)}" + ) + return inputs + + @contextmanager def set_cwd(path): original_dir = abspath(os.getcwd()) From 4ae79e25bf6d084bf77e5d6cc2bbd7e5c93604e3 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Fri, 13 Jan 2023 05:37:49 +0000 Subject: [PATCH 05/31] add model set up for torchbench model --- benchmarks/benchmark_model.py | 2 +- benchmarks/experiment_runner.py | 23 +++++++++--------- benchmarks/torchbench_model.py | 43 ++++++++++++++++++++++++++++++++- 3 files changed, 55 insertions(+), 13 deletions(-) diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index a8019f4054e..0ffb2751f4f 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -133,4 +133,4 @@ def eval(self, inputs, collect_outputs=True): @property def filename_str(self): - return f"{self.suite_name}-{self.model_name}-{self.batch_size}" \ No newline at end of file + return f"{self.suite_name}-{self.model_name}-{self.batch_size}" diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index 6f03aa09dd4..ca22997f3d2 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -94,7 +94,7 @@ def run_single_experiment(self, experiment_config, model_config): timings = OrderedDict() results = [] for i in range(self._args.repeat): - timing, result = self.timed_iteration( + timing, result = self.timed_run( benchmark_experiment, benchmark_model ) result = move_to_device(result, 'cpu') @@ -125,28 +125,29 @@ def _synchronize(self, benchmark_experiment): def prepare_inputs(self, example_inputs, should_randomize_input): inputs_list = [] - for i in range(self._args.repeat_inner): + for i in range(self._args.iterations_per_run): inputs = copy.deepcopy(example_inputs) if should_randomize_input: inputs = randomize_input(inputs) inputs_list.append(inputs) return inputs_list - def timed_iteration(self, benchmark_experiment, benchmark_model): + def timed_run(self, benchmark_experiment, benchmark_model): reset_rng_state() inputs_list = self.prepare_inputs(benchmark_model.example_inputs, self._args.randomize_input) + reset_rng_state() self._mark_step(benchmark_experiment) self._synchronize(benchmark_experiment) timing = OrderedDict() t_start = time.perf_counter() - for i in range(self._args.repeat_inner): + for i in range(self._args.iterations_per_run): result = benchmark_model.model_iter_fn(inputs_list[i], collect_outputs=False) - if benchmark_experiment.xla and self._args.repeat_inner == 1: + if benchmark_experiment.xla and self._args.iterations_per_run == 1: t_trace = time.perf_counter() self._mark_step(benchmark_experiment) @@ -156,8 +157,8 @@ def timed_iteration(self, benchmark_experiment, benchmark_model): t_end = time.perf_counter() timing["total"] = t_end - t_start - timing["average"] = timing["total"] / self._args.repeat_inner - if benchmark_experiment.xla and self._args.repeat_inner == 1: + timing["average"] = timing["total"] / self._args.iterations_per_run + if benchmark_experiment.xla and self._args.iterations_per_run == 1: timing["trace"] = t_trace - t_start return timing, result @@ -184,18 +185,18 @@ def parse_args(args=None): "--repeat", type=int, default=10, - help="Number of times to repeat the timed iteration.", + help="Number of times to repeat the timed run in a single experiment.", ) parser.add_argument( - "--repeat-inner", + "--iterations-per-run", type=int, default=1, - help="Number of times to repeat the model function inside the timed iteration.", + help="Number of times to repeat the model iteration inside a timed run.", ) parser.add_argument( - "--batch_size", + "--batch-size", type=int, help="Batch size to be used. If not provided, it depends on the model suites to determine it.", ) diff --git a/benchmarks/torchbench_model.py b/benchmarks/torchbench_model.py index bb72120c3d1..d824982ee08 100644 --- a/benchmarks/torchbench_model.py +++ b/benchmarks/torchbench_model.py @@ -1,3 +1,5 @@ +import gc +import importlib import logging import os from os.path import abspath, exists @@ -102,4 +104,43 @@ def set_up(self): """ self.optimizer_class = torch.optim.Adam - + try: + module = importlib.import_module(f"torchbenchmark.models.{self.model_name}") + except ModuleNotFoundError: + module = importlib.import_module(f"torchbenchmark.models.fb.{self.model_name}") + benchmark_cls = getattr(module, "Model", None) + + cant_change_batch_size = ( + not getattr(benchmark_cls, "ALLOW_CUSTOMIZE_BSIZE", True) + ) + if cant_change_batch_size: + self.batch_size = None + + # workaround "RuntimeError: not allowed to set torch.backends.cudnn flags" + # torch.backends.__allow_nonbracketed_mutation_flag = True + + if self.benchmark_experiment.accelerator == "cpu": + device = "cpu" + elif self.benchmark_experiment.accelerator == "gpu": + device = "cuda" + else: + device = str(self.benchmark_experiment.get_device()) + + benchmark = benchmark_cls( + test=self.benchmark_experiment.test, + device=device, + jit=False, + batch_size=self.batch_size, + ) + + self.module, self.example_inputs = benchmark.get_module() + + self.batch_size = benchmark.batch_size + + # Torchbench has quite different setup for yolov3, so directly passing + # the right example_inputs + if self.model_name == "yolov3": + self.example_inputs = (torch.rand(self.batch_size, 3, 384, 512),) + + del benchmark + gc.collect() From 35d446fa7f7e1c102513d9d61d180e476d9c4771 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Fri, 13 Jan 2023 23:13:44 +0000 Subject: [PATCH 06/31] update ExperimentLoader --- benchmarks/benchmark_experiment.py | 38 ++++++++++++++++-------------- benchmarks/torchbench_model.py | 2 +- benchmarks/util.py | 20 +++++++++++----- 3 files changed, 35 insertions(+), 25 deletions(-) diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py index e3d3899d5c5..80e777d817c 100644 --- a/benchmarks/benchmark_experiment.py +++ b/benchmarks/benchmark_experiment.py @@ -3,9 +3,9 @@ import torch try: - from .util import is_tpu_available + from .util import is_xla_device_available except ImportError: - from util import is_tpu_available + from util import is_xla_device_available logger = logging.getLogger(__name__) @@ -18,14 +18,7 @@ def __init__(self, args): def list_experiment_configs(self): experiment_configs = [] - accelerators = ["cpu"] - - if torch.cuda.is_available(): - accelerators.append("gpu") - - if is_tpu_available(): - accelerators.append("tpu") - + accelerators = ["cpu", "gpu", "tpu"] xla_options = [None, "PJRT", "XRT"] tests = ["eval", "train"] for accelerator in accelerators: @@ -33,31 +26,40 @@ def list_experiment_configs(self): for test in tests: experiment_config = {"accelerator": accelerator, "xla": xla, "test": test} - if not self._is_valid(experiment_config): + if not self.is_available(experiment_config): continue - self._add_experiment_env(experiment_config, accelerators) + self._add_experiment_env(experiment_config) experiment_configs.append(experiment_config) return experiment_configs - def _is_valid(self, experiment_config): - if experiment_config["accelerator"] == "tpu" and not experiment_config["xla"]: + def is_available(self, experiment_config): + if (experiment_config["xla"] and + not is_xla_device_available(experiment_config["accelerator"].upper())): + return False + if (experiment_config["accelerator"] == "tpu" and + not experiment_config["xla"]): + return False + if (experiment_config["accelerator"] == "gpu" and + experiment_config["xla"] == "PJRT"): return False - if experiment_config["accelerator"] == "gpu" and experiment_config["xla"] == "PJRT": + if (experiment_config["accelerator"] == "gpu" and + not experiment_config["xla"] and + not torch.cuda.is_available()): return False return True - def _add_experiment_env(self, experiment_config, accelerators): + def _add_experiment_env(self, experiment_config): process_env = None if experiment_config["xla"] == "PJRT": process_env = os.environ.copy() process_env["PJRT_DEVICE"] = experiment_config["accelerator"].upper() elif experiment_config["xla"] == "XRT": process_env = os.environ.copy() - if "tpu" in accelerators: + if is_xla_device_available("TPU"): process_env["TPU_NUM_DEVICES"] = "1" process_env["XRT_TPU_CONFIG"] = "localservice;0;localhost:51011" - elif "gpu" in accelerators: + elif is_xla_device_available("GPU"): process_env["GPU_NUM_DEVICES"] = "1" experiment_config["process_env"] = process_env diff --git a/benchmarks/torchbench_model.py b/benchmarks/torchbench_model.py index d824982ee08..7c8953a3f64 100644 --- a/benchmarks/torchbench_model.py +++ b/benchmarks/torchbench_model.py @@ -121,7 +121,7 @@ def set_up(self): if self.benchmark_experiment.accelerator == "cpu": device = "cpu" - elif self.benchmark_experiment.accelerator == "gpu": + elif self.benchmark_experiment.accelerator == "gpu" and not self.benchmark_experiment.xla: device = "cuda" else: device = str(self.benchmark_experiment.get_device()) diff --git a/benchmarks/util.py b/benchmarks/util.py index 47a3c49d96b..6fcdfb6e641 100644 --- a/benchmarks/util.py +++ b/benchmarks/util.py @@ -36,23 +36,31 @@ def reset_rng_state(): np.random.seed(1337) -@functools.lru_cache(maxsize=1) -def is_tpu_available(): +@functools.lru_cache(maxsize=3) +def is_xla_device_available(devkind): + if devkind not in ["CPU", "GPU", "TPU"]: + raise ValueError, devkind - def _check_tpu(q): + def _check_xla_device(q, devkind): try: import os - os.environ["PJRT_DEVICE"] = "TPU" + if devkind == "TPU": + os.environ["PJRT_DEVICE"] = "TPU" + elif devkind == "GPU": + os.environ["GPU_NUM_DEVICES"] = "1" import torch_xla.core.xla_model as xm - q.put((xm.xrt_world_size() > 1) or bool(xm.get_xla_supported_devices("TPU"))) + if devkind == "CPU": + q.put(True) + else: + q.put(bool(xm.get_xla_supported_devices(devkind))) except Exception: traceback.print_exc() q.put(None) q = Queue() - process = Process(target=_check_tpu, args=(q,)) + process = Process(target=_check_xla_device, args=(q, devkind)) process.start() process.join(60) try: From 4bb173da0b9c8c5a99c14dc3116637ccb169e945 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Wed, 18 Jan 2023 21:35:04 +0000 Subject: [PATCH 07/31] Add saving results --- benchmarks/benchmark_experiment.py | 11 ++- benchmarks/benchmark_model.py | 11 ++- benchmarks/experiment_runner.py | 107 +++++++++++++++++++++++++---- benchmarks/torchbench_model.py | 16 ++--- benchmarks/util.py | 20 +++--- 5 files changed, 119 insertions(+), 46 deletions(-) diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py index 80e777d817c..09098fb279c 100644 --- a/benchmarks/benchmark_experiment.py +++ b/benchmarks/benchmark_experiment.py @@ -40,9 +40,6 @@ def is_available(self, experiment_config): if (experiment_config["accelerator"] == "tpu" and not experiment_config["xla"]): return False - if (experiment_config["accelerator"] == "gpu" and - experiment_config["xla"] == "PJRT"): - return False if (experiment_config["accelerator"] == "gpu" and not experiment_config["xla"] and not torch.cuda.is_available()): @@ -68,17 +65,19 @@ def load_experiment(self, experiment_config): accelerator = experiment_config.get("accelerator", "cpu") xla = experiment_config.get("xla", None) test = experiment_config.get("test", "eval") - benchmark_experiment = BenchmarkExperiment(accelerator=accelerator, xla=xla, test=test) + batch_size = experiment_config.get("batch_size", self._args.batch_size) + benchmark_experiment = BenchmarkExperiment(accelerator=accelerator, xla=xla, test=test, batch_size=batch_size) return benchmark_experiment class BenchmarkExperiment: - def __init__(self, accelerator, xla, test): + def __init__(self, accelerator, xla, test, batch_size): self.accelerator = accelerator self.xla = xla self.test = test + self.batch_size = batch_size def get_device(self): if self.xla: @@ -95,4 +94,4 @@ def get_device(self): @property def filename_str(self): - return f"{self.accelerator}-{self.xla}-{self.test}" \ No newline at end of file + return f"{self.accelerator}-{self.xla}-{self.test}-{self.batch_size}" \ No newline at end of file diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index 0ffb2751f4f..2275aaf1528 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -46,11 +46,9 @@ def skip_model(self, model_name): def load_model(self, model_config, benchmark_experiment): suite_name = self.suite_name model_name = model_config["model_name"] - batch_size = self._args.batch_size benchmark_model = BenchmarkModel( suite_name=suite_name, model_name=model_name, - batch_size=batch_size, benchmark_experiment=benchmark_experiment, ) @@ -62,10 +60,9 @@ def load_model(self, model_config, benchmark_experiment): class BenchmarkModel: - def __init__(self, suite_name, model_name, batch_size, benchmark_experiment): + def __init__(self, suite_name, model_name, benchmark_experiment): self.suite_name = suite_name self.model_name = model_name - self.batch_size = batch_size self.benchmark_experiment = benchmark_experiment def set_up(self): @@ -85,8 +82,8 @@ def set_up(self): nn.Softmax(dim=1), ) - self.batch_size = 16 - self.example_inputs = (torch.rand(self.batch_size, 32),) + self.benchmark_experiment.batch_size = 16 + self.example_inputs = (torch.rand(self.benchmark_experiment.batch_size, 32),) self.optimizer_class = torch.optim.Adam def prepare_for_experiment(self): @@ -133,4 +130,4 @@ def eval(self, inputs, collect_outputs=True): @property def filename_str(self): - return f"{self.suite_name}-{self.model_name}-{self.batch_size}" + return f"{self.suite_name}-{self.model_name}" diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index ca22997f3d2..f68db4068ed 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -1,9 +1,12 @@ import argparse from collections import OrderedDict import copy +import csv +import io import json import logging import numpy as np +import os import subprocess import sys import time @@ -39,8 +42,9 @@ def __init__(self, args): else: raise NotImplementedError - # TODO: initialize output directory from args - # self.output_dir + self.output_dir = os.path.abspath(self._args.output_dirname) + os.makedirs(self.output_dir, exist_ok=True) + self.output_file = os.path.join(self.output_dir, self._args.output_basename) def run(self): if self._args.experiment_config and self._args.model_config: @@ -52,6 +56,8 @@ def run(self): self.run_single_experiment(experiment_config, model_config) else: assert not self._args.experiment_config and not self._args.model_config + if os.path.exists(self.output_file): + os.unlink(self.output_file) experiment_configs = self.experiment_loader.list_experiment_configs() model_configs = self.model_loader.list_model_configs() @@ -74,19 +80,21 @@ def run(self): timeout=60 * 20, env=process_env, ) - except subprocess.TimeoutExpired: + except subprocess.TimeoutExpired as e: logger.error("TIMEOUT") - except subprocess.SubprocessError: + self.record_failed_experiment(model_config_str, experiment_config_str, e) + except subprocess.SubprocessError as e: logger.error("ERROR") + self.record_failed_experiment(model_config_str, experiment_config_str, e) else: logger.warning("SKIP because of incompatible configs.") def run_single_experiment(self, experiment_config, model_config): - reset_rng_state() benchmark_experiment = self.experiment_loader.load_experiment( experiment_config ) + reset_rng_state(benchmark_experiment) benchmark_model = self.model_loader.load_model( model_config, benchmark_experiment ) @@ -103,11 +111,62 @@ def run_single_experiment(self, experiment_config, model_config): if i == 0: timings[key] = np.zeros(self._args.repeat, np.float64) timings[key][i] = val - - # TODO: save the config, timings and results to proper files in self.output_dir - logger.info(f"{benchmark_model.filename_str}-{benchmark_experiment.filename_str}") - print(timings) - # self.save_results(timings, results) + # print(timings) + + self.save_results(benchmark_experiment, benchmark_model, timings, results) + + def save_results(self, benchmark_experiment, benchmark_model, timings, results): + detail_file_name = f"{benchmark_model.filename_str}-{benchmark_experiment.filename_str}.pt" + csv_headers = [ + "suite_name", + "model_name", + "accelerator", + "xla", + "test", + "batch_size", + "median_total", + "median_average", + "detail_results", + ] + + csv_row = [ + benchmark_model.suite_name, + benchmark_model.model_name, + benchmark_experiment.accelerator, + benchmark_experiment.xla, + benchmark_experiment.test, + benchmark_experiment.batch_size, + np.median(timings["total"]).item(), + np.median(timings["average"]).item(), + detail_file_name, + ] + + self.output_csv(csv_headers, csv_row) + + torch.save({"timings": timings, "results": results}, + os.path.join(self.output_dir, detail_file_name)) + + def record_failed_experiment(self, model_config_str, experiment_config_str, e): + headers = ["model_config", "experiment_config", "failure"] + row = [model_config_str, experiment_config_str, e] + file_path = os.path.join(self.output_dir, "failed_experiments.csv") + self.output_csv(headers, row, file_path) + + def output_csv(self, headers, row, file_path=None): + if not file_path: + file_path = self.output_file + existed = os.path.exists(file_path) + output = csv.writer( + io.TextIOWrapper( + open(file_path, "ab", buffering=0), + "utf-8", + write_through=True, + ), + lineterminator="\n", + ) + if not existed: + output.writerow(headers) + output.writerow([(f"{x:.8e}" if isinstance(x, float) else x) for x in row]) def _mark_step(self, benchmark_experiment): if benchmark_experiment.xla: @@ -133,11 +192,11 @@ def prepare_inputs(self, example_inputs, should_randomize_input): return inputs_list def timed_run(self, benchmark_experiment, benchmark_model): - reset_rng_state() + reset_rng_state(benchmark_experiment) inputs_list = self.prepare_inputs(benchmark_model.example_inputs, self._args.randomize_input) - reset_rng_state() + reset_rng_state(benchmark_experiment) self._mark_step(benchmark_experiment) self._synchronize(benchmark_experiment) @@ -145,7 +204,7 @@ def timed_run(self, benchmark_experiment, benchmark_model): t_start = time.perf_counter() for i in range(self._args.iterations_per_run): - result = benchmark_model.model_iter_fn(inputs_list[i], collect_outputs=False) + result = benchmark_model.model_iter_fn(inputs_list[i], collect_outputs=self._args.collect_outputs) if benchmark_experiment.xla and self._args.iterations_per_run == 1: t_trace = time.perf_counter() @@ -227,6 +286,28 @@ def parse_args(args=None): help="Whether to randomize the input values. Dimensions will be kept the same.", ) + parser.add_argument( + "--collect-outputs", + action="store_true", + help="""Whether to collect outputs for training. Set this to true if we + want to verify the numerical correctness of graidents. But that may + cause time measurement not accurate""", + ) + + parser.add_argument( + "--output-dirname", + type=str, + default="./output/", + help="Overrides the directory to place output files.", + ) + + parser.add_argument( + "--output-basename", + type=str, + default="results.csv", + help="Overrides the basename of output files.", + ) + parser.add_argument( "--experiment-config", type=str, diff --git a/benchmarks/torchbench_model.py b/benchmarks/torchbench_model.py index 7c8953a3f64..ead1ca31f6c 100644 --- a/benchmarks/torchbench_model.py +++ b/benchmarks/torchbench_model.py @@ -28,7 +28,7 @@ def __init__(self, args): self.torchbench_dir = self.add_torchbench_dir() - def add_torchbench_dir(): + def add_torchbench_dir(self): os.environ["KALDI_ROOT"] = "/tmp" # avoids some spam for torchbench_dir in ( "./torchbenchmark", @@ -78,11 +78,9 @@ def is_compatible(self, model_config, experiment_config): def load_model(self, model_config, benchmark_experiment): suite_name = self.suite_name model_name = model_config["model_name"] - batch_size = self._args.batch_size benchmark_model = TorchBenchModel( suite_name=suite_name, model_name=model_name, - batch_size=batch_size, benchmark_experiment=benchmark_experiment, ) @@ -94,8 +92,8 @@ def load_model(self, model_config, benchmark_experiment): class TorchBenchModel(BenchmarkModel): - def __init__(self, suite_name, model_name, batch_size, benchmark_experiment): - super().__init__(suite_name, model_name, batch_size, benchmark_experiment) + def __init__(self, suite_name, model_name, benchmark_experiment): + super().__init__(suite_name, model_name, benchmark_experiment) def set_up(self): """Set up module, actual batch_size, example_inputs, and optimizer_class @@ -114,7 +112,7 @@ def set_up(self): not getattr(benchmark_cls, "ALLOW_CUSTOMIZE_BSIZE", True) ) if cant_change_batch_size: - self.batch_size = None + self.benchmark_experiment.batch_size = None # workaround "RuntimeError: not allowed to set torch.backends.cudnn flags" # torch.backends.__allow_nonbracketed_mutation_flag = True @@ -130,17 +128,17 @@ def set_up(self): test=self.benchmark_experiment.test, device=device, jit=False, - batch_size=self.batch_size, + batch_size=self.benchmark_experiment.batch_size, ) self.module, self.example_inputs = benchmark.get_module() - self.batch_size = benchmark.batch_size + self.benchmark_experiment.batch_size = benchmark.batch_size # Torchbench has quite different setup for yolov3, so directly passing # the right example_inputs if self.model_name == "yolov3": - self.example_inputs = (torch.rand(self.batch_size, 3, 384, 512),) + self.example_inputs = (torch.rand(self.benchmark_experiment.batch_size, 3, 384, 512),) del benchmark gc.collect() diff --git a/benchmarks/util.py b/benchmarks/util.py index 6fcdfb6e641..65d1aa592ca 100644 --- a/benchmarks/util.py +++ b/benchmarks/util.py @@ -30,34 +30,32 @@ def deterministic_torch_manual_seed(*args, **kwargs): torch.manual_seed = deterministic_torch_manual_seed -def reset_rng_state(): +def reset_rng_state(benchmark_experiment=None): torch.manual_seed(1337) random.seed(1337) np.random.seed(1337) + if benchmark_experiment and benchmark_experiment.xla: + import torch_xla.core.xla_model as xm + device = benchmark_experiment.get_device() + xm.set_rng_state(1337, str(device)) @functools.lru_cache(maxsize=3) def is_xla_device_available(devkind): if devkind not in ["CPU", "GPU", "TPU"]: - raise ValueError, devkind + raise ValueError(devkind) def _check_xla_device(q, devkind): try: import os - if devkind == "TPU": - os.environ["PJRT_DEVICE"] = "TPU" - elif devkind == "GPU": - os.environ["GPU_NUM_DEVICES"] = "1" + os.environ["PJRT_DEVICE"] = devkind import torch_xla.core.xla_model as xm - if devkind == "CPU": - q.put(True) - else: - q.put(bool(xm.get_xla_supported_devices(devkind))) + q.put(bool(xm.get_xla_supported_devices(devkind=devkind))) except Exception: traceback.print_exc() - q.put(None) + q.put(False) q = Queue() process = Process(target=_check_xla_device, args=(q, devkind)) From 980cd5b327b8832785e2d4c04dc9d17ac6735610 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Wed, 18 Jan 2023 22:25:32 +0000 Subject: [PATCH 08/31] minor args update --- benchmarks/benchmark_model.py | 11 ++++++----- benchmarks/experiment_runner.py | 6 +++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index 2275aaf1528..2baed04ca37 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -115,18 +115,19 @@ def optimizer_step(self): def compute_loss(self, pred): return reduce_to_scalar_loss(pred) - def train(self, inputs, collect_outputs=True): + def train(self, inputs, collect_full_result=False): self.optimizer_zero_grad() pred = self.module(*inputs) loss = self.compute_loss(pred) loss.backward() self.optimizer_step() - if collect_outputs: + if collect_full_result: return collect_results(self.module, pred, loss, inputs) - return None + return pred - def eval(self, inputs, collect_outputs=True): - return self.module(*inputs) + def eval(self, inputs, collect_full_result=False): + pred = self.module(*inputs) + return pred @property def filename_str(self): diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index f68db4068ed..5e49dae5eab 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -204,7 +204,7 @@ def timed_run(self, benchmark_experiment, benchmark_model): t_start = time.perf_counter() for i in range(self._args.iterations_per_run): - result = benchmark_model.model_iter_fn(inputs_list[i], collect_outputs=self._args.collect_outputs) + result = benchmark_model.model_iter_fn(inputs_list[i], collect_full_result=self._args.collect_full_result) if benchmark_experiment.xla and self._args.iterations_per_run == 1: t_trace = time.perf_counter() @@ -287,9 +287,9 @@ def parse_args(args=None): ) parser.add_argument( - "--collect-outputs", + "--collect-full-result", action="store_true", - help="""Whether to collect outputs for training. Set this to true if we + help="""Whether to collect full result for training. Set this to true if we want to verify the numerical correctness of graidents. But that may cause time measurement not accurate""", ) From a4106666fc5f19506698cb694b1816f8ea9c1e54 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Wed, 18 Jan 2023 23:50:42 +0000 Subject: [PATCH 09/31] update style --- benchmarks/benchmark_experiment.py | 14 +- benchmarks/benchmark_model.py | 19 +-- benchmarks/experiment_runner.py | 216 +++++++++++++++-------------- benchmarks/torchbench_model.py | 19 +-- benchmarks/util.py | 8 +- 5 files changed, 141 insertions(+), 135 deletions(-) diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py index 09098fb279c..03fbe8baafc 100644 --- a/benchmarks/benchmark_experiment.py +++ b/benchmarks/benchmark_experiment.py @@ -24,7 +24,11 @@ def list_experiment_configs(self): for accelerator in accelerators: for xla in xla_options: for test in tests: - experiment_config = {"accelerator": accelerator, "xla": xla, "test": test} + experiment_config = { + "accelerator": accelerator, + "xla": xla, + "test": test + } if not self.is_available(experiment_config): continue @@ -41,8 +45,7 @@ def is_available(self, experiment_config): not experiment_config["xla"]): return False if (experiment_config["accelerator"] == "gpu" and - not experiment_config["xla"] and - not torch.cuda.is_available()): + not experiment_config["xla"] and not torch.cuda.is_available()): return False return True @@ -66,7 +69,8 @@ def load_experiment(self, experiment_config): xla = experiment_config.get("xla", None) test = experiment_config.get("test", "eval") batch_size = experiment_config.get("batch_size", self._args.batch_size) - benchmark_experiment = BenchmarkExperiment(accelerator=accelerator, xla=xla, test=test, batch_size=batch_size) + benchmark_experiment = BenchmarkExperiment( + accelerator=accelerator, xla=xla, test=test, batch_size=batch_size) return benchmark_experiment @@ -94,4 +98,4 @@ def get_device(self): @property def filename_str(self): - return f"{self.accelerator}-{self.xla}-{self.test}-{self.batch_size}" \ No newline at end of file + return f"{self.accelerator}-{self.xla}-{self.test}-{self.batch_size}" diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index 2baed04ca37..c735000d9b0 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -10,7 +10,6 @@ except ImportError: from util import move_to_device - logger = logging.getLogger(__name__) @@ -22,7 +21,9 @@ def __init__(self, args): def list_model_configs(self): model_configs = [ - {"model_name": "dummy"}, + { + "model_name": "dummy" + }, ] return model_configs @@ -32,11 +33,10 @@ def is_compatible(self, model_config, experiment_config): def get_benchmark_indices(self, length): start = self._args.partition_id * (length // self._args.total_partitions) - end = ( - (self._args.partition_id + 1) * (length // self._args.total_partitions) - if self._args.partition_id < self._args.total_partitions - 1 - else length - ) + end = ((self._args.partition_id + 1) * + (length // self._args.total_partitions) + if self._args.partition_id < self._args.total_partitions - 1 else + length) return start, end def skip_model(self, model_name): @@ -83,7 +83,8 @@ def set_up(self): ) self.benchmark_experiment.batch_size = 16 - self.example_inputs = (torch.rand(self.benchmark_experiment.batch_size, 32),) + self.example_inputs = (torch.rand(self.benchmark_experiment.batch_size, + 32),) self.optimizer_class = torch.optim.Adam def prepare_for_experiment(self): @@ -122,7 +123,7 @@ def train(self, inputs, collect_full_result=False): loss.backward() self.optimizer_step() if collect_full_result: - return collect_results(self.module, pred, loss, inputs) + return collect_results(self.module, pred, loss, inputs) return pred def eval(self, inputs, collect_full_result=False): diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index 5e49dae5eab..822d7d5b13b 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -82,29 +82,27 @@ def run(self): ) except subprocess.TimeoutExpired as e: logger.error("TIMEOUT") - self.record_failed_experiment(model_config_str, experiment_config_str, e) + self.record_failed_experiment(model_config_str, + experiment_config_str, e) except subprocess.SubprocessError as e: logger.error("ERROR") - self.record_failed_experiment(model_config_str, experiment_config_str, e) + self.record_failed_experiment(model_config_str, + experiment_config_str, e) else: logger.warning("SKIP because of incompatible configs.") def run_single_experiment(self, experiment_config, model_config): benchmark_experiment = self.experiment_loader.load_experiment( - experiment_config - ) + experiment_config) reset_rng_state(benchmark_experiment) - benchmark_model = self.model_loader.load_model( - model_config, benchmark_experiment - ) + benchmark_model = self.model_loader.load_model(model_config, + benchmark_experiment) timings = OrderedDict() results = [] for i in range(self._args.repeat): - timing, result = self.timed_run( - benchmark_experiment, benchmark_model - ) + timing, result = self.timed_run(benchmark_experiment, benchmark_model) result = move_to_device(result, 'cpu') results.append(result) for key, val in timing.items(): @@ -115,7 +113,8 @@ def run_single_experiment(self, experiment_config, model_config): self.save_results(benchmark_experiment, benchmark_model, timings, results) - def save_results(self, benchmark_experiment, benchmark_model, timings, results): + def save_results(self, benchmark_experiment, benchmark_model, timings, + results): detail_file_name = f"{benchmark_model.filename_str}-{benchmark_experiment.filename_str}.pt" csv_headers = [ "suite_name", @@ -143,10 +142,13 @@ def save_results(self, benchmark_experiment, benchmark_model, timings, results): self.output_csv(csv_headers, csv_row) - torch.save({"timings": timings, "results": results}, - os.path.join(self.output_dir, detail_file_name)) + torch.save({ + "timings": timings, + "results": results + }, os.path.join(self.output_dir, detail_file_name)) - def record_failed_experiment(self, model_config_str, experiment_config_str, e): + def record_failed_experiment(self, model_config_str, experiment_config_str, + e): headers = ["model_config", "experiment_config", "failure"] row = [model_config_str, experiment_config_str, e] file_path = os.path.join(self.output_dir, "failed_experiments.csv") @@ -194,7 +196,8 @@ def prepare_inputs(self, example_inputs, should_randomize_input): def timed_run(self, benchmark_experiment, benchmark_model): reset_rng_state(benchmark_experiment) - inputs_list = self.prepare_inputs(benchmark_model.example_inputs, self._args.randomize_input) + inputs_list = self.prepare_inputs(benchmark_model.example_inputs, + self._args.randomize_input) reset_rng_state(benchmark_experiment) self._mark_step(benchmark_experiment) @@ -204,7 +207,8 @@ def timed_run(self, benchmark_experiment, benchmark_model): t_start = time.perf_counter() for i in range(self._args.iterations_per_run): - result = benchmark_model.model_iter_fn(inputs_list[i], collect_full_result=self._args.collect_full_result) + result = benchmark_model.model_iter_fn( + inputs_list[i], collect_full_result=self._args.collect_full_result) if benchmark_experiment.xla and self._args.iterations_per_run == 1: t_trace = time.perf_counter() @@ -224,103 +228,101 @@ def timed_run(self, benchmark_experiment, benchmark_model): def parse_args(args=None): - parser = argparse.ArgumentParser() - - parser.add_argument( - "--suite-name", - required=True, - choices=["dummy", "torchbench"], - help="Suite name for the model garden.", - ) - - parser.add_argument( - "--filter", "-k", action="append", help="filter benchmarks with regexp" - ) - parser.add_argument( - "--exclude", "-x", action="append", help="filter benchmarks with regexp" - ) - - parser.add_argument( - "--repeat", - type=int, - default=10, - help="Number of times to repeat the timed run in a single experiment.", - ) - - parser.add_argument( - "--iterations-per-run", - type=int, - default=1, - help="Number of times to repeat the model iteration inside a timed run.", - ) - - parser.add_argument( - "--batch-size", - type=int, - help="Batch size to be used. If not provided, it depends on the model suites to determine it.", - ) - - parser.add_argument( - "--total-partitions", - type=int, - default=1, - choices=range(1, 10), - help="Total number of partitions we want to divide the benchmark suite into", - ) - parser.add_argument( - "--partition-id", - type=int, - default=0, - help="ID of the benchmark suite partition to be run. Used to divide CI tasks", - ) - - parser.add_argument( - "--dry-run", - action="store_true", - help="Do a dry run to only print the benchmark commands.", - ) - - parser.add_argument( - "--randomize-input", - action="store_true", - help="Whether to randomize the input values. Dimensions will be kept the same.", - ) - - parser.add_argument( - "--collect-full-result", - action="store_true", - help="""Whether to collect full result for training. Set this to true if we + parser = argparse.ArgumentParser() + + parser.add_argument( + "--suite-name", + required=True, + choices=["dummy", "torchbench"], + help="Suite name for the model garden.", + ) + + parser.add_argument( + "--filter", "-k", action="append", help="filter benchmarks with regexp") + parser.add_argument( + "--exclude", "-x", action="append", help="filter benchmarks with regexp") + + parser.add_argument( + "--repeat", + type=int, + default=10, + help="Number of times to repeat the timed run in a single experiment.", + ) + + parser.add_argument( + "--iterations-per-run", + type=int, + default=1, + help="Number of times to repeat the model iteration inside a timed run.", + ) + + parser.add_argument( + "--batch-size", + type=int, + help="Batch size to be used. If not provided, it depends on the model suites to determine it.", + ) + + parser.add_argument( + "--total-partitions", + type=int, + default=1, + choices=range(1, 10), + help="Total number of partitions we want to divide the benchmark suite into", + ) + parser.add_argument( + "--partition-id", + type=int, + default=0, + help="ID of the benchmark suite partition to be run. Used to divide CI tasks", + ) + + parser.add_argument( + "--dry-run", + action="store_true", + help="Do a dry run to only print the benchmark commands.", + ) + + parser.add_argument( + "--randomize-input", + action="store_true", + help="Whether to randomize the input values. Dimensions will be kept the same.", + ) + + parser.add_argument( + "--collect-full-result", + action="store_true", + help="""Whether to collect full result for training. Set this to true if we want to verify the numerical correctness of graidents. But that may cause time measurement not accurate""", - ) + ) - parser.add_argument( - "--output-dirname", - type=str, - default="./output/", - help="Overrides the directory to place output files.", - ) + parser.add_argument( + "--output-dirname", + type=str, + default="./output/", + help="Overrides the directory to place output files.", + ) - parser.add_argument( - "--output-basename", - type=str, - default="results.csv", - help="Overrides the basename of output files.", - ) + parser.add_argument( + "--output-basename", + type=str, + default="results.csv", + help="Overrides the basename of output files.", + ) - parser.add_argument( - "--experiment-config", - type=str, - help="JSON string of the experiment config dict.", - ) + parser.add_argument( + "--experiment-config", + type=str, + help="JSON string of the experiment config dict.", + ) - parser.add_argument( - "--model-config", - type=str, - help="JSON string of the model config dict.", - ) + parser.add_argument( + "--model-config", + type=str, + help="JSON string of the model config dict.", + ) - return parser.parse_args(args) + return parser.parse_args(args) def main(): @@ -336,4 +338,4 @@ def main(): if __name__ == "__main__": logging.basicConfig(level=logging.INFO, force=True) - main() \ No newline at end of file + main() diff --git a/benchmarks/torchbench_model.py b/benchmarks/torchbench_model.py index ead1ca31f6c..fa609f28b76 100644 --- a/benchmarks/torchbench_model.py +++ b/benchmarks/torchbench_model.py @@ -17,7 +17,6 @@ from util import move_to_device, set_cwd from benchmark_model import ModelLoader, BenchmarkModel - logger = logging.getLogger(__name__) @@ -61,7 +60,7 @@ def list_model_configs(self): models = _list_model_paths() start, end = self.get_benchmark_indices(len(models)) - models = models[start: end] + models = models[start:end] for model_path in models: model_name = os.path.basename(model_path) @@ -103,16 +102,17 @@ def set_up(self): self.optimizer_class = torch.optim.Adam try: - module = importlib.import_module(f"torchbenchmark.models.{self.model_name}") + module = importlib.import_module( + f"torchbenchmark.models.{self.model_name}") except ModuleNotFoundError: - module = importlib.import_module(f"torchbenchmark.models.fb.{self.model_name}") + module = importlib.import_module( + f"torchbenchmark.models.fb.{self.model_name}") benchmark_cls = getattr(module, "Model", None) - cant_change_batch_size = ( - not getattr(benchmark_cls, "ALLOW_CUSTOMIZE_BSIZE", True) - ) + cant_change_batch_size = (not getattr(benchmark_cls, + "ALLOW_CUSTOMIZE_BSIZE", True)) if cant_change_batch_size: - self.benchmark_experiment.batch_size = None + self.benchmark_experiment.batch_size = None # workaround "RuntimeError: not allowed to set torch.backends.cudnn flags" # torch.backends.__allow_nonbracketed_mutation_flag = True @@ -138,7 +138,8 @@ def set_up(self): # Torchbench has quite different setup for yolov3, so directly passing # the right example_inputs if self.model_name == "yolov3": - self.example_inputs = (torch.rand(self.benchmark_experiment.batch_size, 3, 384, 512),) + self.example_inputs = (torch.rand(self.benchmark_experiment.batch_size, 3, + 384, 512),) del benchmark gc.collect() diff --git a/benchmarks/util.py b/benchmarks/util.py index 65d1aa592ca..000f42ea0ad 100644 --- a/benchmarks/util.py +++ b/benchmarks/util.py @@ -95,16 +95,14 @@ def randomize_input(inputs): return inputs else: raise RuntimeError( - f"randomize_input need support tensor of type {inputs.dtype}" - ) + f"randomize_input need support tensor of type {inputs.dtype}") elif isinstance(inputs, (list, tuple)): return type(inputs)([randomize_input(x) for x in inputs]) elif isinstance(inputs, dict): return dict((k, randomize_input(x)) for k, x in inputs.items()) else: logger.warning( - f"randomize_input can not handle input of type {type(inputs)}" - ) + f"randomize_input can not handle input of type {type(inputs)}") return inputs @@ -115,4 +113,4 @@ def set_cwd(path): try: yield finally: - os.chdir(original_dir) \ No newline at end of file + os.chdir(original_dir) From 9e1506edfd5c234346d1ae45448234b4b029a016 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Thu, 19 Jan 2023 01:12:41 +0000 Subject: [PATCH 10/31] add experiment name --- benchmarks/benchmark_experiment.py | 56 ++++++++++++++++++++---------- benchmarks/experiment_runner.py | 7 ++++ 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py index 03fbe8baafc..310e3b2b815 100644 --- a/benchmarks/benchmark_experiment.py +++ b/benchmarks/benchmark_experiment.py @@ -14,27 +14,39 @@ class ExperimentLoader: def __init__(self, args): self._args = args + self.experiment_name = self._args.experiment_name + + def expand_config_choices(self, config_choices): + configs = [{}] + + for key, choices in config_choices.items(): + tmp_configs = [] + for config in configs: + for choice in choices: + tmp_config = config.copy() + tmp_config[key] = choice + tmp_configs.append(tmp_config) + configs = tmp_configs + + return configs def list_experiment_configs(self): + if self.experiment_name == "run_all": + config_choices = { + "accelerator": ["cpu", "gpu", "tpu"], + "xla": [None, "PJRT", "XRT"], + "test": ["eval", "train"], + } + else: + raise NotImplementedError + experiment_configs = [] + for experiment_config in self.expand_config_choices(config_choices): + if not self.is_available(experiment_config): + continue - accelerators = ["cpu", "gpu", "tpu"] - xla_options = [None, "PJRT", "XRT"] - tests = ["eval", "train"] - for accelerator in accelerators: - for xla in xla_options: - for test in tests: - experiment_config = { - "accelerator": accelerator, - "xla": xla, - "test": test - } - - if not self.is_available(experiment_config): - continue - - self._add_experiment_env(experiment_config) - experiment_configs.append(experiment_config) + self._add_experiment_env(experiment_config) + experiment_configs.append(experiment_config) return experiment_configs def is_available(self, experiment_config): @@ -65,19 +77,25 @@ def _add_experiment_env(self, experiment_config): experiment_config["process_env"] = process_env def load_experiment(self, experiment_config): + experiment_name = self.experiment_name accelerator = experiment_config.get("accelerator", "cpu") xla = experiment_config.get("xla", None) test = experiment_config.get("test", "eval") batch_size = experiment_config.get("batch_size", self._args.batch_size) benchmark_experiment = BenchmarkExperiment( - accelerator=accelerator, xla=xla, test=test, batch_size=batch_size) + experiment_name=experiment_name, + accelerator=accelerator, + xla=xla, + test=test, + batch_size=batch_size) return benchmark_experiment class BenchmarkExperiment: - def __init__(self, accelerator, xla, test, batch_size): + def __init__(self, experiment_name, accelerator, xla, test, batch_size): + self.experiment_name = experiment_name self.accelerator = accelerator self.xla = xla self.test = test diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index 822d7d5b13b..082d52843fb 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -242,6 +242,13 @@ def parse_args(args=None): parser.add_argument( "--exclude", "-x", action="append", help="filter benchmarks with regexp") + parser.add_argument( + "--experiment-name", + default="run_all", + choices=["run_all"], + help="Experiment name to run.", + ) + parser.add_argument( "--repeat", type=int, From 7591cbce5f97c9c5186bd6a8544ffd5abd2a100a Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Thu, 19 Jan 2023 23:12:06 +0000 Subject: [PATCH 11/31] add grad context for eval and train --- benchmarks/benchmark_experiment.py | 13 ++++++++++++- benchmarks/benchmark_model.py | 6 ++++++ benchmarks/experiment_runner.py | 29 +++++++++++++++++------------ benchmarks/torchbench_model.py | 6 ++---- benchmarks/util.py | 7 ++++++- 5 files changed, 43 insertions(+), 18 deletions(-) diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py index 310e3b2b815..1ae7d0d20e5 100644 --- a/benchmarks/benchmark_experiment.py +++ b/benchmarks/benchmark_experiment.py @@ -7,6 +7,12 @@ except ImportError: from util import is_xla_device_available +try: + import torch_xla.core.xla_model as xm +except ImportError: + # ignore the error if torch_xla is not installed + pass + logger = logging.getLogger(__name__) @@ -73,6 +79,12 @@ def _add_experiment_env(self, experiment_config): process_env["XRT_TPU_CONFIG"] = "localservice;0;localhost:51011" elif is_xla_device_available("GPU"): process_env["GPU_NUM_DEVICES"] = "1" + elif not experiment_config["xla"] and is_xla_device_available( + experiment_config["accelerator"].upper()): + # In non-xla CPU training experiments, an env var is still needed if an + # xla device exists, or there will be "Missing XLA configuration" error. + process_env = os.environ.copy() + process_env["PJRT_DEVICE"] = experiment_config["accelerator"].upper() experiment_config["process_env"] = process_env @@ -103,7 +115,6 @@ def __init__(self, experiment_name, accelerator, xla, test, batch_size): def get_device(self): if self.xla: - import torch_xla.core.xla_model as xm device = xm.xla_device(devkind=self.accelerator.upper()) elif self.accelerator == "cpu": device = torch.device("cpu") diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index c735000d9b0..20125ff8a9b 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -103,6 +103,12 @@ def prepare_for_experiment(self): else: raise NotImplementedError + def pick_grad(self): + if self.benchmark_experiment.test == "eval": + return torch.no_grad() + elif self.benchmark_experiment.test == "train": + return torch.enable_grad() + def optimizer_zero_grad(self): if self.optimizer is not None: self.optimizer.zero_grad(True) diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index 082d52843fb..47cb2f34c67 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -24,6 +24,12 @@ from benchmark_experiment import ExperimentLoader from util import patch_torch_manual_seed, reset_rng_state, move_to_device, randomize_input +try: + import torch_xla.core.xla_model as xm +except ImportError: + # ignore the error if torch_xla is not installed + pass + logger = logging.getLogger(__name__) @@ -99,16 +105,17 @@ def run_single_experiment(self, experiment_config, model_config): benchmark_model = self.model_loader.load_model(model_config, benchmark_experiment) - timings = OrderedDict() - results = [] - for i in range(self._args.repeat): - timing, result = self.timed_run(benchmark_experiment, benchmark_model) - result = move_to_device(result, 'cpu') - results.append(result) - for key, val in timing.items(): - if i == 0: - timings[key] = np.zeros(self._args.repeat, np.float64) - timings[key][i] = val + with benchmark_model.pick_grad(): + timings = OrderedDict() + results = [] + for i in range(self._args.repeat): + timing, result = self.timed_run(benchmark_experiment, benchmark_model) + result = move_to_device(result, 'cpu') + results.append(result) + for key, val in timing.items(): + if i == 0: + timings[key] = np.zeros(self._args.repeat, np.float64) + timings[key][i] = val # print(timings) self.save_results(benchmark_experiment, benchmark_model, timings, results) @@ -172,12 +179,10 @@ def output_csv(self, headers, row, file_path=None): def _mark_step(self, benchmark_experiment): if benchmark_experiment.xla: - import torch_xla.core.xla_model as xm xm.mark_step() def _synchronize(self, benchmark_experiment): if benchmark_experiment.xla: - import torch_xla.core.xla_model as xm xm.wait_device_ops() elif benchmark_experiment.accelerator == "gpu": torch.cuda.synchronize() diff --git a/benchmarks/torchbench_model.py b/benchmarks/torchbench_model.py index fa609f28b76..3ccdb6df778 100644 --- a/benchmarks/torchbench_model.py +++ b/benchmarks/torchbench_model.py @@ -117,12 +117,10 @@ def set_up(self): # workaround "RuntimeError: not allowed to set torch.backends.cudnn flags" # torch.backends.__allow_nonbracketed_mutation_flag = True - if self.benchmark_experiment.accelerator == "cpu": - device = "cpu" - elif self.benchmark_experiment.accelerator == "gpu" and not self.benchmark_experiment.xla: + if self.benchmark_experiment.accelerator == "gpu" and not self.benchmark_experiment.xla: device = "cuda" else: - device = str(self.benchmark_experiment.get_device()) + device = "cpu" benchmark = benchmark_cls( test=self.benchmark_experiment.test, diff --git a/benchmarks/util.py b/benchmarks/util.py index 000f42ea0ad..b18ccc559ee 100644 --- a/benchmarks/util.py +++ b/benchmarks/util.py @@ -10,6 +10,12 @@ import torch import traceback +try: + import torch_xla.core.xla_model as xm +except ImportError: + # ignore the error if torch_xla is not installed + pass + logger = logging.getLogger(__name__) @@ -35,7 +41,6 @@ def reset_rng_state(benchmark_experiment=None): random.seed(1337) np.random.seed(1337) if benchmark_experiment and benchmark_experiment.xla: - import torch_xla.core.xla_model as xm device = benchmark_experiment.get_device() xm.set_rng_state(1337, str(device)) From 6e9a3273134ca1a7f06cbeae0d4f4e1aa8d578b9 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Fri, 20 Jan 2023 01:11:58 +0000 Subject: [PATCH 12/31] minor user config update --- benchmarks/benchmark_experiment.py | 16 ++++++++++--- benchmarks/experiment_runner.py | 37 ++++++++++++++++++++++++------ benchmarks/torchbench_model.py | 6 +++-- 3 files changed, 47 insertions(+), 12 deletions(-) diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py index 1ae7d0d20e5..a3cef50791d 100644 --- a/benchmarks/benchmark_experiment.py +++ b/benchmarks/benchmark_experiment.py @@ -43,6 +43,16 @@ def list_experiment_configs(self): "xla": [None, "PJRT", "XRT"], "test": ["eval", "train"], } + + if self._args.accelerator: + config_choices["accelerator"] = [self._args.accelerator] + if self._args.xla: + if self._args.xla == "None": + config_choices["xla"] = [None] + else: + config_choices["xla"] = [self._args.xla] + if self._args.test: + config_choices["test"] = [self._args.test] else: raise NotImplementedError @@ -90,9 +100,9 @@ def _add_experiment_env(self, experiment_config): def load_experiment(self, experiment_config): experiment_name = self.experiment_name - accelerator = experiment_config.get("accelerator", "cpu") - xla = experiment_config.get("xla", None) - test = experiment_config.get("test", "eval") + accelerator = experiment_config["accelerator"] + xla = experiment_config["xla"] + test = experiment_config["test"] batch_size = experiment_config.get("batch_size", self._args.batch_size) benchmark_experiment = BenchmarkExperiment( experiment_name=experiment_name, diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index 47cb2f34c67..7c14d08aa73 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -131,7 +131,7 @@ def save_results(self, benchmark_experiment, benchmark_model, timings, "test", "batch_size", "median_total", - "median_average", + "median_per_iter", "detail_results", ] @@ -143,7 +143,7 @@ def save_results(self, benchmark_experiment, benchmark_model, timings, benchmark_experiment.test, benchmark_experiment.batch_size, np.median(timings["total"]).item(), - np.median(timings["average"]).item(), + np.median(timings["per_iter"]).item(), detail_file_name, ] @@ -210,13 +210,18 @@ def timed_run(self, benchmark_experiment, benchmark_model): timing = OrderedDict() t_start = time.perf_counter() + if benchmark_experiment.xla: + t_trace = 0 for i in range(self._args.iterations_per_run): + if benchmark_experiment.xla: + t_trace_start = time.perf_counter() + result = benchmark_model.model_iter_fn( inputs_list[i], collect_full_result=self._args.collect_full_result) - if benchmark_experiment.xla and self._args.iterations_per_run == 1: - t_trace = time.perf_counter() + if benchmark_experiment.xla: + t_trace += time.perf_counter() - t_trace_start self._mark_step(benchmark_experiment) @@ -225,9 +230,9 @@ def timed_run(self, benchmark_experiment, benchmark_model): t_end = time.perf_counter() timing["total"] = t_end - t_start - timing["average"] = timing["total"] / self._args.iterations_per_run - if benchmark_experiment.xla and self._args.iterations_per_run == 1: - timing["trace"] = t_trace - t_start + timing["per_iter"] = timing["total"] / self._args.iterations_per_run + if benchmark_experiment.xla: + timing["trace_per_iter"] = t_trace / self._args.iterations_per_run return timing, result @@ -254,6 +259,24 @@ def parse_args(args=None): help="Experiment name to run.", ) + parser.add_argument( + "--accelerator", + choices=["cpu", "gpu", "tpu"], + help="Specify an accelerator to use.", + ) + + parser.add_argument( + "--xla", + choices=["None", "PJRT", "XRT"], + help="Specify an xla option to use.", + ) + + parser.add_argument( + "--test", + choices=["eval", "train"], + help="Specify a test to run.", + ) + parser.add_argument( "--repeat", type=int, diff --git a/benchmarks/torchbench_model.py b/benchmarks/torchbench_model.py index 3ccdb6df778..fa609f28b76 100644 --- a/benchmarks/torchbench_model.py +++ b/benchmarks/torchbench_model.py @@ -117,10 +117,12 @@ def set_up(self): # workaround "RuntimeError: not allowed to set torch.backends.cudnn flags" # torch.backends.__allow_nonbracketed_mutation_flag = True - if self.benchmark_experiment.accelerator == "gpu" and not self.benchmark_experiment.xla: + if self.benchmark_experiment.accelerator == "cpu": + device = "cpu" + elif self.benchmark_experiment.accelerator == "gpu" and not self.benchmark_experiment.xla: device = "cuda" else: - device = "cpu" + device = str(self.benchmark_experiment.get_device()) benchmark = benchmark_cls( test=self.benchmark_experiment.test, From 1939fe08404f6527d1ab53f694cb2ebd48b90eb1 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Fri, 20 Jan 2023 23:20:14 +0000 Subject: [PATCH 13/31] fix train() return item --- benchmarks/benchmark_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index 20125ff8a9b..2fcd191dc5d 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -130,7 +130,7 @@ def train(self, inputs, collect_full_result=False): self.optimizer_step() if collect_full_result: return collect_results(self.module, pred, loss, inputs) - return pred + return loss.detach() def eval(self, inputs, collect_full_result=False): pred = self.module(*inputs) From 8a50910d9f4fa34501d799e5488e2894da512bac Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Sat, 21 Jan 2023 01:39:05 +0000 Subject: [PATCH 14/31] minor refactor --- benchmarks/benchmark_model.py | 3 ++- benchmarks/torchbench_model.py | 16 +--------------- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index 2fcd191dc5d..38feac80cac 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -18,6 +18,7 @@ class ModelLoader: def __init__(self, args): self._args = args self.suite_name = self._args.suite_name + self.benchmark_model_class = BenchmarkModel def list_model_configs(self): model_configs = [ @@ -46,7 +47,7 @@ def skip_model(self, model_name): def load_model(self, model_config, benchmark_experiment): suite_name = self.suite_name model_name = model_config["model_name"] - benchmark_model = BenchmarkModel( + benchmark_model = self.benchmark_model_class( suite_name=suite_name, model_name=model_name, benchmark_experiment=benchmark_experiment, diff --git a/benchmarks/torchbench_model.py b/benchmarks/torchbench_model.py index fa609f28b76..0535a50e397 100644 --- a/benchmarks/torchbench_model.py +++ b/benchmarks/torchbench_model.py @@ -24,7 +24,7 @@ class TorchBenchModelLoader(ModelLoader): def __init__(self, args): super().__init__(args) - + self.benchmark_model_class = TorchBenchModel self.torchbench_dir = self.add_torchbench_dir() def add_torchbench_dir(self): @@ -74,20 +74,6 @@ def list_model_configs(self): def is_compatible(self, model_config, experiment_config): return True - def load_model(self, model_config, benchmark_experiment): - suite_name = self.suite_name - model_name = model_config["model_name"] - benchmark_model = TorchBenchModel( - suite_name=suite_name, - model_name=model_name, - benchmark_experiment=benchmark_experiment, - ) - - benchmark_model.set_up() - benchmark_model.prepare_for_experiment() - - return benchmark_model - class TorchBenchModel(BenchmarkModel): From bacda048b374232d089db1f9492e9bfde2ba0d30 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Mon, 23 Jan 2023 23:09:41 +0000 Subject: [PATCH 15/31] add dynamo options --- benchmarks/benchmark_experiment.py | 42 ++++++++++++++++++++++++------ benchmarks/benchmark_model.py | 5 ++++ benchmarks/experiment_runner.py | 12 +++++++++ 3 files changed, 51 insertions(+), 8 deletions(-) diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py index a3cef50791d..2ec155d2168 100644 --- a/benchmarks/benchmark_experiment.py +++ b/benchmarks/benchmark_experiment.py @@ -1,6 +1,7 @@ import logging import os import torch +import torch._dynamo as dynamo try: from .util import is_xla_device_available @@ -41,18 +42,26 @@ def list_experiment_configs(self): config_choices = { "accelerator": ["cpu", "gpu", "tpu"], "xla": [None, "PJRT", "XRT"], + "dynamo": [ + None, "inductor", "torchxla_trace_once", "aot_torchxla_trace_once" + ], "test": ["eval", "train"], } if self._args.accelerator: - config_choices["accelerator"] = [self._args.accelerator] + config_choices["accelerator"] = list(set(self._args.accelerator)) if self._args.xla: - if self._args.xla == "None": - config_choices["xla"] = [None] - else: - config_choices["xla"] = [self._args.xla] + config_choices["xla"] = list(set(self._args.xla)) + config_choices["xla"] = [ + x if x != "None" else None for x in config_choices["xla"] + ] + if self._args.dynamo: + config_choices["dynamo"] = list(set(self._args.dynamo)) + config_choices["dynamo"] = [ + x if x != "None" else None for x in config_choices["dynamo"] + ] if self._args.test: - config_choices["test"] = [self._args.test] + config_choices["test"] = list(set(self._args.test)) else: raise NotImplementedError @@ -66,6 +75,19 @@ def list_experiment_configs(self): return experiment_configs def is_available(self, experiment_config): + if experiment_config["dynamo"] and experiment_config[ + "dynamo"] not in dynamo.list_backends(): + return False + if experiment_config["dynamo"] == "inductor" and not ( + experiment_config["accelerator"] == "gpu" and + not experiment_config["xla"]): + return False + if experiment_config["dynamo"] == "torchxla_trace_once" and not ( + experiment_config["xla"] and experiment_config["test"] == "eval"): + return False + if experiment_config["dynamo"] == "aot_torchxla_trace_once" and not ( + experiment_config["xla"] and experiment_config["test"] == "train"): + return False if (experiment_config["xla"] and not is_xla_device_available(experiment_config["accelerator"].upper())): return False @@ -102,12 +124,14 @@ def load_experiment(self, experiment_config): experiment_name = self.experiment_name accelerator = experiment_config["accelerator"] xla = experiment_config["xla"] + dynamo = experiment_config["dynamo"] test = experiment_config["test"] batch_size = experiment_config.get("batch_size", self._args.batch_size) benchmark_experiment = BenchmarkExperiment( experiment_name=experiment_name, accelerator=accelerator, xla=xla, + dynamo=dynamo, test=test, batch_size=batch_size) @@ -116,10 +140,12 @@ def load_experiment(self, experiment_config): class BenchmarkExperiment: - def __init__(self, experiment_name, accelerator, xla, test, batch_size): + def __init__(self, experiment_name, accelerator, xla, dynamo, test, + batch_size): self.experiment_name = experiment_name self.accelerator = accelerator self.xla = xla + self.dynamo = dynamo self.test = test self.batch_size = batch_size @@ -137,4 +163,4 @@ def get_device(self): @property def filename_str(self): - return f"{self.accelerator}-{self.xla}-{self.test}-{self.batch_size}" + return f"{self.accelerator}-{self.xla}-{self.dynamo}-{self.test}-{self.batch_size}" diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index 38feac80cac..2afe2dfc08b 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -2,6 +2,7 @@ import re import torch import torch.nn as nn +import torch._dynamo as dynamo from torch._dynamo.testing import collect_results, reduce_to_scalar_loss import types @@ -104,6 +105,10 @@ def prepare_for_experiment(self): else: raise NotImplementedError + if self.benchmark_experiment.dynamo: + self.model_iter_fn = dynamo.optimize(self.benchmark_experiment.dynamo)( + self.model_iter_fn) + def pick_grad(self): if self.benchmark_experiment.test == "eval": return torch.no_grad() diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index 7c14d08aa73..587bd4e4b0e 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -262,18 +262,30 @@ def parse_args(args=None): parser.add_argument( "--accelerator", choices=["cpu", "gpu", "tpu"], + action="append", help="Specify an accelerator to use.", ) parser.add_argument( "--xla", choices=["None", "PJRT", "XRT"], + action="append", + help="Specify an xla option to use.", + ) + + parser.add_argument( + "--dynamo", + choices=[ + "None", "inductor", "torchxla_trace_once", "aot_torchxla_trace_once" + ], + action="append", help="Specify an xla option to use.", ) parser.add_argument( "--test", choices=["eval", "train"], + action="append", help="Specify a test to run.", ) From 0bc642ef8854117fe6ae0b9645a7f6ce3f44e692 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Mon, 23 Jan 2023 23:57:14 +0000 Subject: [PATCH 16/31] add column in result for dynamo setting --- benchmarks/experiment_runner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index 587bd4e4b0e..ada5531a5c0 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -128,6 +128,7 @@ def save_results(self, benchmark_experiment, benchmark_model, timings, "model_name", "accelerator", "xla", + "dynamo", "test", "batch_size", "median_total", @@ -140,6 +141,7 @@ def save_results(self, benchmark_experiment, benchmark_model, timings, benchmark_model.model_name, benchmark_experiment.accelerator, benchmark_experiment.xla, + benchmark_experiment.dynamo, benchmark_experiment.test, benchmark_experiment.batch_size, np.median(timings["total"]).item(), From 38511809dd465a2ad850626617ccbb2418f21fa8 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Tue, 24 Jan 2023 23:18:23 +0000 Subject: [PATCH 17/31] using to capture output and error --- benchmarks/experiment_runner.py | 53 ++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index ada5531a5c0..10d924d1727 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -11,7 +11,7 @@ import sys import time import torch -import types +from tqdm import tqdm try: from .benchmark_model import ModelLoader @@ -55,7 +55,7 @@ def __init__(self, args): def run(self): if self._args.experiment_config and self._args.model_config: if self._args.dry_run: - logger.info(f"Dry run with {[sys.executable] + sys.argv}") + logger.warning(f"Dry run with {[sys.executable] + sys.argv}") return experiment_config = json.loads(self._args.experiment_config) model_config = json.loads(self._args.model_config) @@ -67,36 +67,55 @@ def run(self): experiment_configs = self.experiment_loader.list_experiment_configs() model_configs = self.model_loader.list_model_configs() - for model_config in model_configs: + logger.warning( + f"Number of selected experiment configs: {len(experiment_configs)}") + logger.warning(f"Number of selected model configs: {len(model_configs)}") + for model_config in tqdm( + model_configs, + desc="model configs", + disable=not self._args.progress_bar): for experiment_config in experiment_configs: + process_env = experiment_config.pop("process_env") + experiment_config_str = json.dumps(experiment_config) + model_config_str = json.dumps(model_config) + experiment_config["process_env"] = process_env if self.model_loader.is_compatible(model_config, experiment_config): - process_env = experiment_config.pop("process_env") - experiment_config_str = json.dumps(experiment_config) - model_config_str = json.dumps(model_config) - experiment_config["process_env"] = process_env command = ([sys.executable] + sys.argv + [f"--experiment-config={experiment_config_str}"] + [f"--model-config={model_config_str}"]) if self._args.dry_run: - logger.info(f"Dry run with {command}") + logger.warning(f"Dry run with {command}") continue try: - subprocess.check_call( + completed_process = subprocess.run( command, timeout=60 * 20, env=process_env, + check=True, + capture_output=True, ) except subprocess.TimeoutExpired as e: logger.error("TIMEOUT") self.record_failed_experiment(model_config_str, experiment_config_str, e) + except subprocess.CalledProcessError as e: + logger.error("ERROR") + self.record_failed_experiment(model_config_str, + experiment_config_str, e.stderr) except subprocess.SubprocessError as e: logger.error("ERROR") self.record_failed_experiment(model_config_str, experiment_config_str, e) + else: + if self._args.print_subprocess: + logger.info(completed_process.stdout) + logger.warning(completed_process.stderr) else: - logger.warning("SKIP because of incompatible configs.") + e = "SKIP because of incompatible model and experiment configs." + logger.warning(e) + self.record_failed_experiment(model_config_str, + experiment_config_str, e) def run_single_experiment(self, experiment_config, model_config): benchmark_experiment = self.experiment_loader.load_experiment( @@ -331,6 +350,18 @@ def parse_args(args=None): help="Do a dry run to only print the benchmark commands.", ) + parser.add_argument( + "--print-subprocess", + action="store_true", + help="Print subprocess stdout.", + ) + + parser.add_argument( + "--progress-bar", + action="store_true", + help="Display progress bar.", + ) + parser.add_argument( "--randomize-input", action="store_true", @@ -386,5 +417,5 @@ def main(): if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, force=True) + logging.basicConfig(level=logging.WARNING, force=True) main() From 83c674ca57964639810e3c53adb31b3a72c83018 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Wed, 25 Jan 2023 18:41:01 +0000 Subject: [PATCH 18/31] Fix some failure cases for dynamo --- benchmarks/benchmark_model.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index 2afe2dfc08b..056a9dbb3d8 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -101,7 +101,14 @@ def prepare_for_experiment(self): elif self.benchmark_experiment.test == "train": self.module.train() self.model_iter_fn = self.train - self.optimizer = self.optimizer_class(self.module.parameters(), lr=0.01) + if self.benchmark_experiment.dynamo == "aot_torchxla_trace_once": + # TODO: dynamo aot_torchxla_trace_once would fail if there is an + # optimizer. + # This makes the aot_torchxla_trace_once results not comparable + # with other training results + self.optimizer = None + else: + self.optimizer = self.optimizer_class(self.module.parameters(), lr=0.01) else: raise NotImplementedError @@ -136,7 +143,9 @@ def train(self, inputs, collect_full_result=False): self.optimizer_step() if collect_full_result: return collect_results(self.module, pred, loss, inputs) - return loss.detach() + # return loss.detach() + # TODO: dynamo inductor would fail if .detach() is used + return None def eval(self, inputs, collect_full_result=False): pred = self.module(*inputs) From 113830f9ef386845c4f0157e9eb71dea0f53d482 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Thu, 26 Jan 2023 18:27:16 +0000 Subject: [PATCH 19/31] reduce eval result size by returning eval loss --- benchmarks/benchmark_model.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index 056a9dbb3d8..f12afa1ac67 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -149,7 +149,10 @@ def train(self, inputs, collect_full_result=False): def eval(self, inputs, collect_full_result=False): pred = self.module(*inputs) - return pred + loss = self.compute_loss(pred) + if collect_full_result: + return pred, loss + return loss @property def filename_str(self): From de609b8128a114b7b9def432c26c6feb4e6f6c8b Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Thu, 26 Jan 2023 22:09:53 +0000 Subject: [PATCH 20/31] minor refactor --- benchmarks/benchmark_model.py | 4 ++-- benchmarks/torchbench_model.py | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index f12afa1ac67..24000a9b834 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -3,7 +3,7 @@ import torch import torch.nn as nn import torch._dynamo as dynamo -from torch._dynamo.testing import collect_results, reduce_to_scalar_loss +from torch._dynamo.testing import collect_results import types try: @@ -133,7 +133,7 @@ def optimizer_step(self): self.optimizer.step() def compute_loss(self, pred): - return reduce_to_scalar_loss(pred) + raise NotImplementedError def train(self, inputs, collect_full_result=False): self.optimizer_zero_grad() diff --git a/benchmarks/torchbench_model.py b/benchmarks/torchbench_model.py index 0535a50e397..c5d34aef849 100644 --- a/benchmarks/torchbench_model.py +++ b/benchmarks/torchbench_model.py @@ -129,3 +129,24 @@ def set_up(self): del benchmark gc.collect() + + def compute_loss(self, pred): + """Reduce the output of a model to get scalar loss""" + if isinstance(pred, torch.Tensor): + # Mean does not work on integer tensors + return pred.sum() / pred.numel() + elif isinstance(pred, (list, tuple)): + return sum([reduce_to_scalar_loss(x) for x in pred]) / len(pred) + elif type(pred).__name__ in ( + "MaskedLMOutput", + "Seq2SeqLMOutput", + "CausalLMOutputWithCrossAttentions", + ): + return reduce_to_scalar_loss(pred.logits) + elif type(pred).__name__ == "SquashedNormal": + return pred.mean.sum() + elif isinstance(pred, dict): + return sum([reduce_to_scalar_loss(value) for value in pred.values()]) / len( + pred.keys() + ) + raise NotImplementedError("Don't know how to reduce", type(pred)) From 2d94836133ea3a8a68aa5baacc05d49845bca93d Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Thu, 26 Jan 2023 22:27:52 +0000 Subject: [PATCH 21/31] revert eval result change --- benchmarks/benchmark_model.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index 24000a9b834..91b9072720d 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -149,10 +149,7 @@ def train(self, inputs, collect_full_result=False): def eval(self, inputs, collect_full_result=False): pred = self.module(*inputs) - loss = self.compute_loss(pred) - if collect_full_result: - return pred, loss - return loss + return pred @property def filename_str(self): From c2ad2781ebdf212fc6e4e497c2ba5f6cc9987bd4 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Mon, 6 Feb 2023 21:17:08 +0000 Subject: [PATCH 22/31] minor fix --- benchmarks/experiment_runner.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index 10d924d1727..ba46c399cac 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -79,13 +79,13 @@ def run(self): experiment_config_str = json.dumps(experiment_config) model_config_str = json.dumps(model_config) experiment_config["process_env"] = process_env + command = ([sys.executable] + sys.argv + + [f"--experiment-config={experiment_config_str}"] + + [f"--model-config={model_config_str}"]) + if self._args.dry_run: + logger.warning(f"Dry run with {command}") + continue if self.model_loader.is_compatible(model_config, experiment_config): - command = ([sys.executable] + sys.argv + - [f"--experiment-config={experiment_config_str}"] + - [f"--model-config={model_config_str}"]) - if self._args.dry_run: - logger.warning(f"Dry run with {command}") - continue try: completed_process = subprocess.run( command, From 88fee741c71d2aa259e5548ed81f45a9bb1ca895 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Fri, 10 Feb 2023 00:13:15 +0000 Subject: [PATCH 23/31] Change output format to jsonl --- benchmarks/benchmark_experiment.py | 13 ++- benchmarks/benchmark_model.py | 22 ++-- benchmarks/experiment_runner.py | 167 ++++++++++++++++------------- benchmarks/torchbench_model.py | 3 - 4 files changed, 120 insertions(+), 85 deletions(-) diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py index 2ec155d2168..61cb08653c2 100644 --- a/benchmarks/benchmark_experiment.py +++ b/benchmarks/benchmark_experiment.py @@ -1,3 +1,4 @@ +from collections import OrderedDict import logging import os import torch @@ -120,7 +121,7 @@ def _add_experiment_env(self, experiment_config): experiment_config["process_env"] = process_env - def load_experiment(self, experiment_config): + def load_experiment(self, experiment_config, dummy=False): experiment_name = self.experiment_name accelerator = experiment_config["accelerator"] xla = experiment_config["xla"] @@ -164,3 +165,13 @@ def get_device(self): @property def filename_str(self): return f"{self.accelerator}-{self.xla}-{self.dynamo}-{self.test}-{self.batch_size}" + + def to_dict(self): + d = OrderedDict() + d["experiment_name"] = self.experiment_name + d["accelerator"] = self.accelerator + d["xla"] = self.xla + d["dynamo"] = self.dynamo + d["test"] = self.test + d["batch_size"] = self.batch_size + return d diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index 91b9072720d..5be78fbd43e 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -1,3 +1,4 @@ +from collections import OrderedDict import logging import re import torch @@ -30,7 +31,7 @@ def list_model_configs(self): return model_configs - def is_compatible(self, model_config, experiment_config): + def is_compatible(self, dummy_benchmark_model, benchmark_experiment): return True def get_benchmark_indices(self, length): @@ -45,7 +46,7 @@ def skip_model(self, model_name): return (not re.search("|".join(self._args.filter), model_name, re.I) or re.search("|".join(self._args.exclude), model_name, re.I)) - def load_model(self, model_config, benchmark_experiment): + def load_model(self, model_config, benchmark_experiment, dummy=False): suite_name = self.suite_name model_name = model_config["model_name"] benchmark_model = self.benchmark_model_class( @@ -54,8 +55,9 @@ def load_model(self, model_config, benchmark_experiment): benchmark_experiment=benchmark_experiment, ) - benchmark_model.set_up() - benchmark_model.prepare_for_experiment() + if not dummy: + benchmark_model.set_up() + benchmark_model.prepare_for_experiment() return benchmark_model @@ -135,22 +137,28 @@ def optimizer_step(self): def compute_loss(self, pred): raise NotImplementedError - def train(self, inputs, collect_full_result=False): + def train(self, inputs, collect_full_output=False): self.optimizer_zero_grad() pred = self.module(*inputs) loss = self.compute_loss(pred) loss.backward() self.optimizer_step() - if collect_full_result: + if collect_full_output: return collect_results(self.module, pred, loss, inputs) # return loss.detach() # TODO: dynamo inductor would fail if .detach() is used return None - def eval(self, inputs, collect_full_result=False): + def eval(self, inputs, collect_full_output=False): pred = self.module(*inputs) return pred @property def filename_str(self): return f"{self.suite_name}-{self.model_name}" + + def to_dict(self): + d = OrderedDict() + d["suite_name"] = self.suite_name + d["model_name"] = self.model_name + return d diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index ba46c399cac..7363e2628dc 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -78,6 +78,10 @@ def run(self): process_env = experiment_config.pop("process_env") experiment_config_str = json.dumps(experiment_config) model_config_str = json.dumps(model_config) + dummy_benchmark_experiment = self.experiment_loader.load_experiment( + experiment_config, dummy=True) + dummy_benchmark_model = self.model_loader.load_model(model_config, + dummy_benchmark_experiment, dummy=True) experiment_config["process_env"] = process_env command = ([sys.executable] + sys.argv + [f"--experiment-config={experiment_config_str}"] + @@ -85,7 +89,7 @@ def run(self): if self._args.dry_run: logger.warning(f"Dry run with {command}") continue - if self.model_loader.is_compatible(model_config, experiment_config): + if self.model_loader.is_compatible(dummy_benchmark_model, dummy_benchmark_experiment): try: completed_process = subprocess.run( command, @@ -93,19 +97,20 @@ def run(self): env=process_env, check=True, capture_output=True, + encoding="utf-8", ) except subprocess.TimeoutExpired as e: logger.error("TIMEOUT") - self.record_failed_experiment(model_config_str, - experiment_config_str, e) + self.record_failed_experiment(dummy_benchmark_model, + dummy_benchmark_experiment, e) except subprocess.CalledProcessError as e: logger.error("ERROR") - self.record_failed_experiment(model_config_str, - experiment_config_str, e.stderr) + self.record_failed_experiment(dummy_benchmark_model, + dummy_benchmark_experiment, e.stderr) except subprocess.SubprocessError as e: logger.error("ERROR") - self.record_failed_experiment(model_config_str, - experiment_config_str, e) + self.record_failed_experiment(dummy_benchmark_model, + dummy_benchmark_experiment, e) else: if self._args.print_subprocess: logger.info(completed_process.stdout) @@ -114,8 +119,8 @@ def run(self): else: e = "SKIP because of incompatible model and experiment configs." logger.warning(e) - self.record_failed_experiment(model_config_str, - experiment_config_str, e) + self.record_failed_experiment(dummy_benchmark_model, + dummy_benchmark_experiment, e) def run_single_experiment(self, experiment_config, model_config): benchmark_experiment = self.experiment_loader.load_experiment( @@ -125,62 +130,56 @@ def run_single_experiment(self, experiment_config, model_config): benchmark_experiment) with benchmark_model.pick_grad(): - timings = OrderedDict() - results = [] + metrics = OrderedDict() + outputs = [] for i in range(self._args.repeat): - timing, result = self.timed_run(benchmark_experiment, benchmark_model) - result = move_to_device(result, 'cpu') - results.append(result) - for key, val in timing.items(): + run_metrics, output = self.timed_run(benchmark_experiment, benchmark_model) + output = move_to_device(output, 'cpu') + outputs.append(output) + for key, val in run_metrics.items(): + # metrics from repeated runs are formed into lists in the metrics dict if i == 0: - timings[key] = np.zeros(self._args.repeat, np.float64) - timings[key][i] = val - # print(timings) - - self.save_results(benchmark_experiment, benchmark_model, timings, results) - - def save_results(self, benchmark_experiment, benchmark_model, timings, - results): - detail_file_name = f"{benchmark_model.filename_str}-{benchmark_experiment.filename_str}.pt" - csv_headers = [ - "suite_name", - "model_name", - "accelerator", - "xla", - "dynamo", - "test", - "batch_size", - "median_total", - "median_per_iter", - "detail_results", - ] - - csv_row = [ - benchmark_model.suite_name, - benchmark_model.model_name, - benchmark_experiment.accelerator, - benchmark_experiment.xla, - benchmark_experiment.dynamo, - benchmark_experiment.test, - benchmark_experiment.batch_size, - np.median(timings["total"]).item(), - np.median(timings["per_iter"]).item(), - detail_file_name, - ] - - self.output_csv(csv_headers, csv_row) - - torch.save({ - "timings": timings, - "results": results - }, os.path.join(self.output_dir, detail_file_name)) - - def record_failed_experiment(self, model_config_str, experiment_config_str, - e): - headers = ["model_config", "experiment_config", "failure"] - row = [model_config_str, experiment_config_str, e] - file_path = os.path.join(self.output_dir, "failed_experiments.csv") - self.output_csv(headers, row, file_path) + metrics[key] = [] + metrics[key].append(val) + + self.save_results(benchmark_experiment, benchmark_model, metrics, outputs) + + def save_results(self, benchmark_experiment, benchmark_model, metrics, + outputs): + if self._args.save_output and outputs is not None: + outputs_file_name = f"{benchmark_model.filename_str}-{benchmark_experiment.filename_str}.pt" + torch.save(outputs, os.path.join(self.output_dir, outputs_file_name)) + else: + outputs_file_name = None + + results = OrderedDict() + results.update(benchmark_model.to_dict()) + results.update(benchmark_experiment.to_dict()) + results["repeat"] = self._args.repeat + results["iterations_per_run"] = self._args.iterations_per_run + + results.update(metrics) + results["outputs_file"] = outputs_file_name + + self.output_jsonl(results) + + def record_failed_experiment(self, dummy_benchmark_model, dummy_benchmark_experiment, e): + results = OrderedDict() + results.update(dummy_benchmark_model.to_dict()) + results.update(dummy_benchmark_experiment.to_dict()) + results["repeat"] = self._args.repeat + results["iterations_per_run"] = self._args.iterations_per_run + + results["error"] = e + + self.output_jsonl(results) + + def output_jsonl(self, obj, file_path=None): + if not file_path: + file_path = self.output_file + json_str = json.dumps(obj, ensure_ascii=False) + with open(file_path, mode="a", encoding="utf-8") as f: + f.write(f"{json_str}\n") def output_csv(self, headers, row, file_path=None): if not file_path: @@ -229,7 +228,7 @@ def timed_run(self, benchmark_experiment, benchmark_model): self._mark_step(benchmark_experiment) self._synchronize(benchmark_experiment) - timing = OrderedDict() + metrics = OrderedDict() t_start = time.perf_counter() if benchmark_experiment.xla: t_trace = 0 @@ -238,8 +237,8 @@ def timed_run(self, benchmark_experiment, benchmark_model): if benchmark_experiment.xla: t_trace_start = time.perf_counter() - result = benchmark_model.model_iter_fn( - inputs_list[i], collect_full_result=self._args.collect_full_result) + output = benchmark_model.model_iter_fn( + inputs_list[i], collect_full_output=self._args.collect_full_output) if benchmark_experiment.xla: t_trace += time.perf_counter() - t_trace_start @@ -250,12 +249,12 @@ def timed_run(self, benchmark_experiment, benchmark_model): t_end = time.perf_counter() - timing["total"] = t_end - t_start - timing["per_iter"] = timing["total"] / self._args.iterations_per_run + metrics["total_time"] = t_end - t_start + metrics["per_iter_time"] = metrics["total_time"] / self._args.iterations_per_run if benchmark_experiment.xla: - timing["trace_per_iter"] = t_trace / self._args.iterations_per_run + metrics["trace_per_iter_time"] = t_trace / self._args.iterations_per_run - return timing, result + return metrics, output def parse_args(args=None): @@ -273,6 +272,13 @@ def parse_args(args=None): parser.add_argument( "--exclude", "-x", action="append", help="filter benchmarks with regexp") + parser.add_argument( + "--log-level", + default="warning", + choices=["info", "warning"], + help="Specify the logging level.", + ) + parser.add_argument( "--experiment-name", default="run_all", @@ -369,13 +375,19 @@ def parse_args(args=None): ) parser.add_argument( - "--collect-full-result", + "--collect-full-output", action="store_true", - help="""Whether to collect full result for training. Set this to true if we + help="""Whether to collect full output for training. Set this to true if we want to verify the numerical correctness of graidents. But that may cause time measurement not accurate""", ) + parser.add_argument( + "--save-output", + action="store_true", + help="Whether to save the output to disk", + ) + parser.add_argument( "--output-dirname", type=str, @@ -386,7 +398,7 @@ def parse_args(args=None): parser.add_argument( "--output-basename", type=str, - default="results.csv", + default="results.jsonl", help="Overrides the basename of output files.", ) @@ -411,11 +423,18 @@ def main(): args.filter = args.filter or [r"."] args.exclude = args.exclude or [r"^$"] + if args.log_level == "info": + log_level = logging.INFO + elif args.log_level == "warning": + log_level = logging.WARNING + else: + log_level = None + logging.basicConfig(level=log_level, force=True) + logger.info(args) runner = ExperimentRunner(args) runner.run() if __name__ == "__main__": - logging.basicConfig(level=logging.WARNING, force=True) main() diff --git a/benchmarks/torchbench_model.py b/benchmarks/torchbench_model.py index c5d34aef849..87d12a02a0f 100644 --- a/benchmarks/torchbench_model.py +++ b/benchmarks/torchbench_model.py @@ -71,9 +71,6 @@ def list_model_configs(self): return model_configs - def is_compatible(self, model_config, experiment_config): - return True - class TorchBenchModel(BenchmarkModel): From deb14825026cbdea0fb64dd90a2c8182acb96f4c Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Fri, 10 Feb 2023 06:17:50 +0000 Subject: [PATCH 24/31] Add accelerator model nname --- benchmarks/benchmark_experiment.py | 8 +++--- benchmarks/experiment_runner.py | 41 +++++++++++++----------------- benchmarks/torchbench_model.py | 5 ++-- benchmarks/util.py | 30 ++++++++++++++++++++++ 4 files changed, 54 insertions(+), 30 deletions(-) diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py index 61cb08653c2..593e5d20cad 100644 --- a/benchmarks/benchmark_experiment.py +++ b/benchmarks/benchmark_experiment.py @@ -5,9 +5,9 @@ import torch._dynamo as dynamo try: - from .util import is_xla_device_available + from .util import is_xla_device_available, get_accelerator_model except ImportError: - from util import is_xla_device_available + from util import is_xla_device_available, get_accelerator_model try: import torch_xla.core.xla_model as xm @@ -149,6 +149,7 @@ def __init__(self, experiment_name, accelerator, xla, dynamo, test, self.dynamo = dynamo self.test = test self.batch_size = batch_size + self.accelerator_model = get_accelerator_model(self.accelerator) def get_device(self): if self.xla: @@ -164,12 +165,13 @@ def get_device(self): @property def filename_str(self): - return f"{self.accelerator}-{self.xla}-{self.dynamo}-{self.test}-{self.batch_size}" + return f"{self.accelerator}-{self.accelerator_model}-{self.xla}-{self.dynamo}-{self.test}-{self.batch_size}" def to_dict(self): d = OrderedDict() d["experiment_name"] = self.experiment_name d["accelerator"] = self.accelerator + d["accelerator_model"] = self.accelerator_model d["xla"] = self.xla d["dynamo"] = self.dynamo d["test"] = self.test diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index 7363e2628dc..808c84186c4 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -80,8 +80,8 @@ def run(self): model_config_str = json.dumps(model_config) dummy_benchmark_experiment = self.experiment_loader.load_experiment( experiment_config, dummy=True) - dummy_benchmark_model = self.model_loader.load_model(model_config, - dummy_benchmark_experiment, dummy=True) + dummy_benchmark_model = self.model_loader.load_model( + model_config, dummy_benchmark_experiment, dummy=True) experiment_config["process_env"] = process_env command = ([sys.executable] + sys.argv + [f"--experiment-config={experiment_config_str}"] + @@ -89,7 +89,8 @@ def run(self): if self._args.dry_run: logger.warning(f"Dry run with {command}") continue - if self.model_loader.is_compatible(dummy_benchmark_model, dummy_benchmark_experiment): + if self.model_loader.is_compatible(dummy_benchmark_model, + dummy_benchmark_experiment): try: completed_process = subprocess.run( command, @@ -101,16 +102,17 @@ def run(self): ) except subprocess.TimeoutExpired as e: logger.error("TIMEOUT") - self.record_failed_experiment(dummy_benchmark_model, - dummy_benchmark_experiment, e) + self.save_results(dummy_benchmark_experiment, + dummy_benchmark_model, {"error": e}, None) except subprocess.CalledProcessError as e: logger.error("ERROR") - self.record_failed_experiment(dummy_benchmark_model, - dummy_benchmark_experiment, e.stderr) + self.save_results(dummy_benchmark_experiment, + dummy_benchmark_model, {"error": e.stderr}, + None) except subprocess.SubprocessError as e: logger.error("ERROR") - self.record_failed_experiment(dummy_benchmark_model, - dummy_benchmark_experiment, e) + self.save_results(dummy_benchmark_experiment, + dummy_benchmark_model, {"error": e}, None) else: if self._args.print_subprocess: logger.info(completed_process.stdout) @@ -119,8 +121,8 @@ def run(self): else: e = "SKIP because of incompatible model and experiment configs." logger.warning(e) - self.record_failed_experiment(dummy_benchmark_model, - dummy_benchmark_experiment, e) + self.save_results(dummy_benchmark_experiment, dummy_benchmark_model, + {"error": e}, None) def run_single_experiment(self, experiment_config, model_config): benchmark_experiment = self.experiment_loader.load_experiment( @@ -133,7 +135,8 @@ def run_single_experiment(self, experiment_config, model_config): metrics = OrderedDict() outputs = [] for i in range(self._args.repeat): - run_metrics, output = self.timed_run(benchmark_experiment, benchmark_model) + run_metrics, output = self.timed_run(benchmark_experiment, + benchmark_model) output = move_to_device(output, 'cpu') outputs.append(output) for key, val in run_metrics.items(): @@ -163,17 +166,6 @@ def save_results(self, benchmark_experiment, benchmark_model, metrics, self.output_jsonl(results) - def record_failed_experiment(self, dummy_benchmark_model, dummy_benchmark_experiment, e): - results = OrderedDict() - results.update(dummy_benchmark_model.to_dict()) - results.update(dummy_benchmark_experiment.to_dict()) - results["repeat"] = self._args.repeat - results["iterations_per_run"] = self._args.iterations_per_run - - results["error"] = e - - self.output_jsonl(results) - def output_jsonl(self, obj, file_path=None): if not file_path: file_path = self.output_file @@ -250,7 +242,8 @@ def timed_run(self, benchmark_experiment, benchmark_model): t_end = time.perf_counter() metrics["total_time"] = t_end - t_start - metrics["per_iter_time"] = metrics["total_time"] / self._args.iterations_per_run + metrics[ + "per_iter_time"] = metrics["total_time"] / self._args.iterations_per_run if benchmark_experiment.xla: metrics["trace_per_iter_time"] = t_trace / self._args.iterations_per_run diff --git a/benchmarks/torchbench_model.py b/benchmarks/torchbench_model.py index 87d12a02a0f..f8ff64fa6d7 100644 --- a/benchmarks/torchbench_model.py +++ b/benchmarks/torchbench_model.py @@ -143,7 +143,6 @@ def compute_loss(self, pred): elif type(pred).__name__ == "SquashedNormal": return pred.mean.sum() elif isinstance(pred, dict): - return sum([reduce_to_scalar_loss(value) for value in pred.values()]) / len( - pred.keys() - ) + return sum([reduce_to_scalar_loss(value) for value in pred.values() + ]) / len(pred.keys()) raise NotImplementedError("Don't know how to reduce", type(pred)) diff --git a/benchmarks/util.py b/benchmarks/util.py index b18ccc559ee..59cdd96af45 100644 --- a/benchmarks/util.py +++ b/benchmarks/util.py @@ -7,11 +7,13 @@ from os.path import abspath import queue import random +import subprocess import torch import traceback try: import torch_xla.core.xla_model as xm + from torch_xla.distributed.cluster import ClusterResolver except ImportError: # ignore the error if torch_xla is not installed pass @@ -119,3 +121,31 @@ def set_cwd(path): yield finally: os.chdir(original_dir) + + +def get_accelerator_model(accelerator): + if accelerator == "cpu": + return get_cpu_name() + elif accelerator == "gpu": + return get_gpu_name() + elif accelerator == "tpu": + return get_tpu_name() + else: + raise NotImplementedError + + +def get_cpu_name(): + return subprocess.check_output( + ["lscpu"], + encoding='utf-8').split("Model name:")[1].split("\n")[0].strip() + + +def get_gpu_name(): + return subprocess.check_output( + ["nvidia-smi", "--query-gpu=gpu_name", "--format=csv"], + encoding='utf-8').split("\n")[1] + + +def get_tpu_name(): + return ClusterResolver.get_instance_metadata( + 'instance/attributes/accelerator-type') From 10c52a7a005930f511da17453f5c3454d03d3bb3 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Sat, 11 Feb 2023 01:51:24 +0000 Subject: [PATCH 25/31] add skipping finished experiments --- benchmarks/benchmark_experiment.py | 2 +- benchmarks/benchmark_model.py | 2 +- benchmarks/experiment_runner.py | 44 ++++++++++++++++++++++++------ 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py index 593e5d20cad..741cae53571 100644 --- a/benchmarks/benchmark_experiment.py +++ b/benchmarks/benchmark_experiment.py @@ -165,7 +165,7 @@ def get_device(self): @property def filename_str(self): - return f"{self.accelerator}-{self.accelerator_model}-{self.xla}-{self.dynamo}-{self.test}-{self.batch_size}" + return "-".join(self.to_dict().values()) def to_dict(self): d = OrderedDict() diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index 5be78fbd43e..c24948a8d34 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -155,7 +155,7 @@ def eval(self, inputs, collect_full_output=False): @property def filename_str(self): - return f"{self.suite_name}-{self.model_name}" + return "-".join(self.to_dict().values()) def to_dict(self): d = OrderedDict() diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index 808c84186c4..c28d520ee23 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -62,8 +62,22 @@ def run(self): self.run_single_experiment(experiment_config, model_config) else: assert not self._args.experiment_config and not self._args.model_config + finished_experiments = set() if os.path.exists(self.output_file): - os.unlink(self.output_file) + if self._args.no_resume: + os.unlink(self.output_file) + else: + with open(self.output_file, mode="r", encoding="utf-8") as f: + jsonlines = f.read().splitlines() + for jsonline in jsonlines: + tmp = json.loads(jsonline) + if self._args.experiment_name == "run_all": + # the finished experiment batch_size may be altered by model set_up(), + # so the dummy experiment will not match it + tmp["experiment"]["batch_size"] = self._args.batch_size + finished_experiments.add("-".join( + str(item) for item in (list(tmp["model"].values()) + + list(tmp["experiment"].values())))) experiment_configs = self.experiment_loader.list_experiment_configs() model_configs = self.model_loader.list_model_configs() @@ -89,6 +103,12 @@ def run(self): if self._args.dry_run: logger.warning(f"Dry run with {command}") continue + if "-".join( + str(item) + for item in (list(dummy_benchmark_model.to_dict().values()) + + list(dummy_benchmark_experiment.to_dict().values()) + )) in finished_experiments: + continue if self.model_loader.is_compatible(dummy_benchmark_model, dummy_benchmark_experiment): try: @@ -103,7 +123,7 @@ def run(self): except subprocess.TimeoutExpired as e: logger.error("TIMEOUT") self.save_results(dummy_benchmark_experiment, - dummy_benchmark_model, {"error": e}, None) + dummy_benchmark_model, {"error": str(e)}, None) except subprocess.CalledProcessError as e: logger.error("ERROR") self.save_results(dummy_benchmark_experiment, @@ -112,7 +132,7 @@ def run(self): except subprocess.SubprocessError as e: logger.error("ERROR") self.save_results(dummy_benchmark_experiment, - dummy_benchmark_model, {"error": e}, None) + dummy_benchmark_model, {"error": str(e)}, None) else: if self._args.print_subprocess: logger.info(completed_process.stdout) @@ -122,7 +142,7 @@ def run(self): e = "SKIP because of incompatible model and experiment configs." logger.warning(e) self.save_results(dummy_benchmark_experiment, dummy_benchmark_model, - {"error": e}, None) + {"error": str(e)}, None) def run_single_experiment(self, experiment_config, model_config): benchmark_experiment = self.experiment_loader.load_experiment( @@ -156,12 +176,12 @@ def save_results(self, benchmark_experiment, benchmark_model, metrics, outputs_file_name = None results = OrderedDict() - results.update(benchmark_model.to_dict()) - results.update(benchmark_experiment.to_dict()) + results["model"] = benchmark_model.to_dict() + results["experiment"] = benchmark_experiment.to_dict() results["repeat"] = self._args.repeat results["iterations_per_run"] = self._args.iterations_per_run - results.update(metrics) + results["metrics"] = metrics results["outputs_file"] = outputs_file_name self.output_jsonl(results) @@ -378,7 +398,7 @@ def parse_args(args=None): parser.add_argument( "--save-output", action="store_true", - help="Whether to save the output to disk", + help="Whether to save the model output to disk", ) parser.add_argument( @@ -395,6 +415,14 @@ def parse_args(args=None): help="Overrides the basename of output files.", ) + parser.add_argument( + "--no-resume", + action="store_true", + help="""By default, the runner would skip the finished experiments that + exist in the output-basename file. If --no-resume is set, the previous + output-basename file will be deleted and all experiment will run""", + ) + parser.add_argument( "--experiment-config", type=str, From 3b3724c14e025d1cfb82ff7c2f5a710ab4b24596 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Tue, 14 Feb 2023 19:46:15 +0000 Subject: [PATCH 26/31] main process needs to remove PJRT_DEVICE env var that is automatically added --- benchmarks/benchmark_experiment.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py index 741cae53571..3f7ab5b79d8 100644 --- a/benchmarks/benchmark_experiment.py +++ b/benchmarks/benchmark_experiment.py @@ -102,6 +102,10 @@ def is_available(self, experiment_config): def _add_experiment_env(self, experiment_config): process_env = None + if experiment_config["xla"]: + # remove env vars that would interfere with subprocess settings + os.environ.pop("PJRT_DEVICE", None) + os.environ.pop("XRT_TPU_CONFIG", None) if experiment_config["xla"] == "PJRT": process_env = os.environ.copy() process_env["PJRT_DEVICE"] = experiment_config["accelerator"].upper() From 668f28976f5898fcaa6af49bcecacfdb4d79df5f Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Wed, 15 Feb 2023 01:02:28 +0000 Subject: [PATCH 27/31] Add a simple result analyzer --- benchmarks/experiment_runner.py | 2 + benchmarks/result_analyzer.py | 156 ++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) create mode 100644 benchmarks/result_analyzer.py diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index c28d520ee23..34e2a4d1629 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -165,6 +165,8 @@ def run_single_experiment(self, experiment_config, model_config): metrics[key] = [] metrics[key].append(val) + # additional experiment metrics can be added here + self.save_results(benchmark_experiment, benchmark_model, metrics, outputs) def save_results(self, benchmark_experiment, benchmark_model, metrics, diff --git a/benchmarks/result_analyzer.py b/benchmarks/result_analyzer.py new file mode 100644 index 00000000000..6ab7061846a --- /dev/null +++ b/benchmarks/result_analyzer.py @@ -0,0 +1,156 @@ +import argparse +from collections import OrderedDict +import copy +import csv +import io +import json +import logging +import numpy as np +import os +import pandas as pd +import subprocess +import sys +import time +import torch +from tqdm import tqdm + +logger = logging.getLogger(__name__) + + +class ResultAnalyzer: + + def __init__(self, args): + self._args = args + self.timestamp = self._args.timestamp or time.time() + self.output_dir = os.path.abspath(self._args.output_dirname) + if not os.path.exists(self.output_dir): + raise ValueError("The output directory does not exist.") + self.output_file = os.path.join(self.output_dir, "metric_report.csv") + + def run(self): + jsonl_files = [] + for file in os.listdir(self.output_dir): + if file.endswith(".jsonl"): + jsonl_files.append(os.path.join(self.output_dir, file)) + + metric_df = pd.DataFrame({"timestamp": pd.Series(dtype="int"), + "suite_name": pd.Series(dtype="str"), + "model_name": pd.Series(dtype="str"), + "experiment_name": pd.Series(dtype="str"), + "accelerator": pd.Series(dtype="str"), + "accelerator_model": pd.Series(dtype="str"), + "xla": pd.Series(dtype="str"), + "dynamo": pd.Series(dtype="str"), + "test": pd.Series(dtype="str"), + "batch_size": pd.Series(dtype="int"), + "repeat": pd.Series(dtype="int"), + "iterations_per_run": pd.Series(dtype="int"), + "error_message": pd.Series(dtype="str"), + "median_total_time": pd.Series(dtype="float"), + "median_per_iter_time": pd.Series(dtype="float"), + "xla_median_trace_per_iter_time": pd.Series(dtype="float"), + "xla_compile_time": pd.Series(dtype="float"), + "dynamo_compile_time": pd.Series(dtype="float"), + "output_file": pd.Series(dtype="str"), + }) + for file in jsonl_files: + metric_df = self.extract_metrics(file, metric_df) + + # additional processing of the metric_df can be done here + + self.export_metric_report(metric_df) + + def extract_metrics(self, file, metric_df): + with open(file, mode="r", encoding="utf-8") as f: + jsonlines = f.read().splitlines() + + for jsonline in jsonlines: + tmp = json.loads(jsonline) + d = {"timestamp": self.timestamp, + "suite_name": tmp["model"]["suite_name"], + "model_name": tmp["model"]["model_name"], + "experiment_name": tmp["experiment"]["experiment_name"], + "accelerator": tmp["experiment"]["accelerator"], + "accelerator_model": tmp["experiment"]["accelerator_model"], + "xla": tmp["experiment"]["xla"], + "dynamo": tmp["experiment"]["dynamo"], + "test": tmp["experiment"]["test"], + "batch_size": tmp["experiment"]["batch_size"], + "repeat": tmp["repeat"], + "iterations_per_run": tmp["iterations_per_run"], + "error_message": tmp["metrics"].get("error", None), + "outputs_file": tmp["outputs_file"], + } + if "error" not in tmp["metrics"]: + total_time = np.asarray(tmp["metrics"]["total_time"], dtype="float") + d["median_total_time"] = np.median(total_time) + per_iter_time = np.asarray(tmp["metrics"]["per_iter_time"], dtype="float") + d["median_per_iter_time"] = np.median(per_iter_time) + if tmp["experiment"]["xla"]: + trace_per_iter_time = np.asarray(tmp["metrics"]["trace_per_iter_time"], dtype="float") + d["xla_median_trace_per_iter_time"] = np.median(trace_per_iter_time) + d["xla_compile_time"] = np.max(total_time) - np.median(total_time) + if tmp["experiment"]["dynamo"]: + d["dynamo_compile_time"] = np.max(total_time) - np.median(total_time) + + new_row = pd.Series(d) + new_row.fillna(value=np.nan, inplace=True) + metric_df = pd.concat([metric_df, new_row.to_frame().T], ignore_index=True) + + return metric_df + + def export_metric_report(self, metric_df): + with open(self.output_file, mode="w", encoding="utf-8") as f: + metric_df.to_csv(f) + +def parse_args(args=None): + parser = argparse.ArgumentParser() + + parser.add_argument( + "--log-level", + default="warning", + choices=["info", "warning"], + help="Specify the logging level.", + ) + + parser.add_argument( + "--experiment-name", + default="run_all", + choices=["run_all"], + help="Experiment name to run.", + ) + + parser.add_argument( + "--output-dirname", + type=str, + default="./output/", + help="Overrides the directory to place output files.", + ) + + parser.add_argument( + "--timestamp", + type=int, + help="User provided timestamp. If not provided, get the timestamp in analyzer", + ) + + return parser.parse_args(args) + + +def main(): + args = parse_args() + + if args.log_level == "info": + log_level = logging.INFO + elif args.log_level == "warning": + log_level = logging.WARNING + else: + log_level = None + logging.basicConfig(level=log_level, force=True) + + logger.info(args) + analyzer = ResultAnalyzer(args) + analyzer.run() + + +if __name__ == "__main__": + main() \ No newline at end of file From 1e787a74eadbe429f2b159b6d9131bfba1fb4434 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Thu, 16 Feb 2023 23:59:03 +0000 Subject: [PATCH 28/31] Result analyzer save to database csv with historical data --- benchmarks/result_analyzer.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/benchmarks/result_analyzer.py b/benchmarks/result_analyzer.py index 6ab7061846a..e1849886b6a 100644 --- a/benchmarks/result_analyzer.py +++ b/benchmarks/result_analyzer.py @@ -27,6 +27,8 @@ def __init__(self, args): raise ValueError("The output directory does not exist.") self.output_file = os.path.join(self.output_dir, "metric_report.csv") + self.database = os.path.abspath(self._args.database) + def run(self): jsonl_files = [] for file in os.listdir(self.output_dir): @@ -51,7 +53,7 @@ def run(self): "xla_median_trace_per_iter_time": pd.Series(dtype="float"), "xla_compile_time": pd.Series(dtype="float"), "dynamo_compile_time": pd.Series(dtype="float"), - "output_file": pd.Series(dtype="str"), + "outputs_file": pd.Series(dtype="str"), }) for file in jsonl_files: metric_df = self.extract_metrics(file, metric_df) @@ -100,8 +102,12 @@ def extract_metrics(self, file, metric_df): return metric_df def export_metric_report(self, metric_df): - with open(self.output_file, mode="w", encoding="utf-8") as f: - metric_df.to_csv(f) + metric_df.to_csv(self.output_file, mode="w", encoding="utf-8", header=True, index=False) + + if not os.path.exists(self.database): + metric_df.to_csv(self.database, mode="w", encoding="utf-8", header=True, index=False) + else: + metric_df.to_csv(self.database, mode="a", encoding="utf-8", header=False, index=False) def parse_args(args=None): parser = argparse.ArgumentParser() @@ -127,6 +133,13 @@ def parse_args(args=None): help="Overrides the directory to place output files.", ) + parser.add_argument( + "--database", + type=str, + default="./output/database.csv", + help="Path to the database.", # for POC, database is a path to a csv file. + ) + parser.add_argument( "--timestamp", type=int, From 80a3fd620230c4f54741705f94fc6b4fd36fb43b Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Wed, 15 Mar 2023 16:32:08 +0000 Subject: [PATCH 29/31] Handle detectron2 models --- benchmarks/benchmark_experiment.py | 2 +- benchmarks/benchmark_model.py | 42 ++++++++++++++++++------------ benchmarks/torchbench_model.py | 23 ++++++++++++++++ 3 files changed, 49 insertions(+), 18 deletions(-) diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py index 3f7ab5b79d8..bd523acfb9e 100644 --- a/benchmarks/benchmark_experiment.py +++ b/benchmarks/benchmark_experiment.py @@ -77,7 +77,7 @@ def list_experiment_configs(self): def is_available(self, experiment_config): if experiment_config["dynamo"] and experiment_config[ - "dynamo"] not in dynamo.list_backends(): + "dynamo"] not in dynamo.list_backends(exclude_tags=()): return False if experiment_config["dynamo"] == "inductor" and not ( experiment_config["accelerator"] == "gpu" and diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index c24948a8d34..2251303ee31 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -91,26 +91,34 @@ def set_up(self): 32),) self.optimizer_class = torch.optim.Adam + def _prepare_for_eval(self): + self.module.eval() + self.model_iter_fn = self.eval + + def _prepare_for_train(self): + self.module.train() + self.model_iter_fn = self.train + if self.benchmark_experiment.dynamo == "aot_torchxla_trace_once": + # TODO: dynamo aot_torchxla_trace_once would fail if there is an + # optimizer. + # This makes the aot_torchxla_trace_once results not comparable + # with other training results + self.optimizer = None + else: + if not hasattr(self, "optimizer"): + # For some special models, self.set_up() may have initialized an + # optimizer to use. So only initialize it when there is none existing. + self.optimizer = self.optimizer_class(self.module.parameters(), lr=0.01) + def prepare_for_experiment(self): self.device = self.benchmark_experiment.get_device() self.module = self.module.to(self.device) self.example_inputs = move_to_device(self.example_inputs, self.device) if self.benchmark_experiment.test == "eval": - self.module.eval() - self.model_iter_fn = self.eval - self.optimizer = None + self._prepare_for_eval() elif self.benchmark_experiment.test == "train": - self.module.train() - self.model_iter_fn = self.train - if self.benchmark_experiment.dynamo == "aot_torchxla_trace_once": - # TODO: dynamo aot_torchxla_trace_once would fail if there is an - # optimizer. - # This makes the aot_torchxla_trace_once results not comparable - # with other training results - self.optimizer = None - else: - self.optimizer = self.optimizer_class(self.module.parameters(), lr=0.01) + self._prepare_for_train() else: raise NotImplementedError @@ -124,13 +132,13 @@ def pick_grad(self): elif self.benchmark_experiment.test == "train": return torch.enable_grad() - def optimizer_zero_grad(self): + def _optimizer_zero_grad(self): if self.optimizer is not None: self.optimizer.zero_grad(True) else: self.module.zero_grad(True) - def optimizer_step(self): + def _optimizer_step(self): if self.optimizer is not None: self.optimizer.step() @@ -138,11 +146,11 @@ def compute_loss(self, pred): raise NotImplementedError def train(self, inputs, collect_full_output=False): - self.optimizer_zero_grad() + self._optimizer_zero_grad() pred = self.module(*inputs) loss = self.compute_loss(pred) loss.backward() - self.optimizer_step() + self._optimizer_step() if collect_full_output: return collect_results(self.module, pred, loss, inputs) # return loss.detach() diff --git a/benchmarks/torchbench_model.py b/benchmarks/torchbench_model.py index f8ff64fa6d7..06bef5ca430 100644 --- a/benchmarks/torchbench_model.py +++ b/benchmarks/torchbench_model.py @@ -20,6 +20,19 @@ logger = logging.getLogger(__name__) +DETECTRON2_MODELS = { + "detectron2_fasterrcnn_r_101_c4", + "detectron2_fasterrcnn_r_101_dc5", + "detectron2_fasterrcnn_r_101_fpn", + "detectron2_fasterrcnn_r_50_c4", + "detectron2_fasterrcnn_r_50_dc5", + "detectron2_fasterrcnn_r_50_fpn", + "detectron2_maskrcnn_r_101_c4", + "detectron2_maskrcnn_r_101_fpn", + "detectron2_maskrcnn_r_50_fpn", +} + + class TorchBenchModelLoader(ModelLoader): def __init__(self, args): @@ -123,6 +136,8 @@ def set_up(self): if self.model_name == "yolov3": self.example_inputs = (torch.rand(self.benchmark_experiment.batch_size, 3, 384, 512),) + if self.benchmark_experiment.test == "train" and self.model_name in DETECTRON2_MODELS: + self.optimizer = benchmark.optimizer del benchmark gc.collect() @@ -146,3 +161,11 @@ def compute_loss(self, pred): return sum([reduce_to_scalar_loss(value) for value in pred.values() ]) / len(pred.keys()) raise NotImplementedError("Don't know how to reduce", type(pred)) + + def train(self, inputs, collect_full_output=False): + if self.model_name in DETECTRON2_MODELS: + from detectron2.utils.events import EventStorage + with EventStorage(): + super().train(inputs, collect_full_output=collect_full_output) + else: + super().train(inputs, collect_full_output=collect_full_output) From a756875a927cafc668df574a9cba22c879729fa3 Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Wed, 15 Mar 2023 23:56:22 +0000 Subject: [PATCH 30/31] minor update --- benchmarks/experiment_runner.py | 2 +- benchmarks/torchbench_model.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index 34e2a4d1629..953092d89c3 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -114,7 +114,7 @@ def run(self): try: completed_process = subprocess.run( command, - timeout=60 * 20, + timeout=60 * 30, env=process_env, check=True, capture_output=True, diff --git a/benchmarks/torchbench_model.py b/benchmarks/torchbench_model.py index 06bef5ca430..6c96b844581 100644 --- a/benchmarks/torchbench_model.py +++ b/benchmarks/torchbench_model.py @@ -29,7 +29,10 @@ "detectron2_fasterrcnn_r_50_fpn", "detectron2_maskrcnn_r_101_c4", "detectron2_maskrcnn_r_101_fpn", + "detectron2_maskrcnn_r_50_c4", "detectron2_maskrcnn_r_50_fpn", + "detectron2_maskrcnn", + "detectron2_fcos_r_50_fpn", } From 307578c5bba5010d7d81825017dac81663440bcb Mon Sep 17 00:00:00 2001 From: Liyang Lu Date: Fri, 19 May 2023 23:54:21 +0000 Subject: [PATCH 31/31] add deny list --- benchmarks/benchmark_model.py | 4 ++-- benchmarks/torchbench_model.py | 42 ++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/benchmarks/benchmark_model.py b/benchmarks/benchmark_model.py index 2251303ee31..dec1dde017a 100644 --- a/benchmarks/benchmark_model.py +++ b/benchmarks/benchmark_model.py @@ -123,8 +123,8 @@ def prepare_for_experiment(self): raise NotImplementedError if self.benchmark_experiment.dynamo: - self.model_iter_fn = dynamo.optimize(self.benchmark_experiment.dynamo)( - self.model_iter_fn) + self.model_iter_fn = torch.compile(self.model_iter_fn, + backend=self.benchmark_experiment.dynamo) def pick_grad(self): if self.benchmark_experiment.test == "eval": diff --git a/benchmarks/torchbench_model.py b/benchmarks/torchbench_model.py index 6c96b844581..99f1ceab62b 100644 --- a/benchmarks/torchbench_model.py +++ b/benchmarks/torchbench_model.py @@ -35,6 +35,25 @@ "detectron2_fcos_r_50_fpn", } +# Skip the experiment of a model if any of the experiment configs in the list is fully matched +DENY_LIST = { + "doctr_det_predictor": [{"test": "train"},], # not implemented + "doctr_reco_predictor": [{"test": "train"},], # not implemented + "detectron2_fcos_r_50_fpn": [{"test": "train"},], # not implemented + # https://github.com/pytorch/torchdynamo/issues/145 + "fambench_xlmr": [{}], + "llama": [{"test": "train"},], # not implemented + "mobilenet_v2_quantized_qat": [{"test": "eval", "accelerator": "gpu"}, # not implemented + {"test": "eval", "accelerator": "tpu"},], # not implemented + "pyhpc_equation_of_state": [{"test": "train"},], # not implemented + "pyhpc_isoneutral_mixing": [{"test": "train"},], # not implemented + "pyhpc_turbulent_kinetic_energy": [{"test": "train"},], # not implemented + "pytorch_struct": [{"test": "eval"},], # not implemented + "resnet50_quantized_qat": [{"test": "eval", "accelerator": "gpu"}, # not implemented + {"test": "eval", "accelerator": "tpu"},], # not implemented + # https://github.com/pytorch/pytorch/issues/99438 + "vision_maskrcnn": [{}], +} class TorchBenchModelLoader(ModelLoader): @@ -87,6 +106,19 @@ def list_model_configs(self): return model_configs + def is_compatible(self, dummy_benchmark_model, benchmark_experiment): + if dummy_benchmark_model.model_name in DENY_LIST: + for deny_experiment_config in DENY_LIST[dummy_benchmark_model.model_name]: + matched = True + for k, v in deny_experiment_config.items(): + if getattr(benchmark_experiment, k) != v: + matched = False + break + if matched: + return False + + return True + class TorchBenchModel(BenchmarkModel): @@ -145,6 +177,16 @@ def set_up(self): del benchmark gc.collect() + def pick_grad(self): + # special case + if self.model_name in ("maml",): + return torch.enable_grad() + + if self.benchmark_experiment.test == "eval": + return torch.no_grad() + elif self.benchmark_experiment.test == "train": + return torch.enable_grad() + def compute_loss(self, pred): """Reduce the output of a model to get scalar loss""" if isinstance(pred, torch.Tensor):