From 9b0ff56525ac8437270db8a0837097dd862e3e11 Mon Sep 17 00:00:00 2001 From: Varun Sundar Rabindranath Date: Thu, 15 Feb 2024 15:12:49 +0000 Subject: [PATCH] Addd benchmark througput script and runner --- neuralmagic/benchmarks/common.py | 26 +++++ .../benchmarks/configs/benchmark_serving.json | 32 +++--- .../configs/benchmark_throughput.json | 38 ++++++ .../benchmarks/run_benchmark_serving.py | 71 ++++++++++++ .../benchmarks/run_benchmark_throughput.py | 26 +++++ neuralmagic/benchmarks/run_benchmarks.py | 108 +++--------------- .../scripts/backend_request_func.py | 6 +- .../benchmarks/scripts/benchmark_serving.py | 8 +- 8 files changed, 201 insertions(+), 114 deletions(-) diff --git a/neuralmagic/benchmarks/common.py b/neuralmagic/benchmarks/common.py index e69de29bb2d1d..6608ef6b466c8 100644 --- a/neuralmagic/benchmarks/common.py +++ b/neuralmagic/benchmarks/common.py @@ -0,0 +1,26 @@ +import itertools +from typing import NamedTuple, Iterable +from neuralmagic.tools.call_cmd import call_cmd + +def download_datasets(config:NamedTuple) -> None: + "config is a NamedTuple constructed from some JSON in neuralmagic/benchmarks/configs" + # download all required datasets + for download_cmd in config.dataset_download_cmds: + download_cmd_as_list = list( + filter(lambda x: len(x) != 0, download_cmd.split(" ")) + ) + call_cmd(download_cmd_as_list, stdout=None, stderr=None) + +def script_args_to_cla(config:NamedTuple) -> Iterable[list[str]]: + "config is a NamedTuple constructed from some JSON in neuralmagic/benchmarks/configs" + + kv = vars(config.script_args) + arg_lists = kv.values() + assert all(map(lambda le: isinstance(le, list), arg_lists)) + + keys = kv.keys() + for args in itertools.product(*arg_lists): + cla = [] + for name, value in zip(keys, args): + cla.extend([f"--{name}", f"{value}"]) + yield cla diff --git a/neuralmagic/benchmarks/configs/benchmark_serving.json b/neuralmagic/benchmarks/configs/benchmark_serving.json index 6815b36ff856a..4bc1d4a7708d0 100644 --- a/neuralmagic/benchmarks/configs/benchmark_serving.json +++ b/neuralmagic/benchmarks/configs/benchmark_serving.json @@ -1,16 +1,20 @@ { - "description" : "Benchmarking system test", + "configs" : [ + { + "description" : "Benchmark vllm serving", + + "dataset_download_cmds" : ["wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"], + + "models" : ["facebook/opt-125m"], + + "script_name" : "benchmark_serving.py", - "dataset_download_cmds" : ["wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"], - - "models" : ["facebook/opt-125m"], - - "script_name" : "benchmark_serving.py", - - "script_args" : { - "num-prompts" : [20, 25], - "request-rate" : [5, 10], - "best-of" : [1], - "dataset" : ["ShareGPT_V3_unfiltered_cleaned_split.json"] - } -} + "script_args" : { + "num-prompts" : [20, 25], + "request-rate" : [5, 10], + "best-of" : [1], + "dataset" : ["ShareGPT_V3_unfiltered_cleaned_split.json"] + } + } + ] +} \ No newline at end of file diff --git a/neuralmagic/benchmarks/configs/benchmark_throughput.json b/neuralmagic/benchmarks/configs/benchmark_throughput.json index e69de29bb2d1d..1ebc1f601e583 100644 --- a/neuralmagic/benchmarks/configs/benchmark_throughput.json +++ b/neuralmagic/benchmarks/configs/benchmark_throughput.json @@ -0,0 +1,38 @@ +{ + "configs" :[ { + "description" : "Benchmark vllm engine throughput - with dataset", + "dataset_download_cmds" : ["wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"], + + "models" : ["facebook/opt-125m"], + "script_name" : "benchmark_throughput.py", + + "script_args" : { + "backend" : ["vllm"], + "dataset" : ["ShareGPT_V3_unfiltered_cleaned_split.json"], + "output-len" : [128], + "tensor-parallel-size" : [1], + "n" : [1], + "num-prompts" : [1000], + "seed" : [0], + "dtype": ["auto"] + } + }, + { + "description" : "Benchmark vllm engine throughput - synthetic", + + "dataset_download_cmds" : [], + "models" : ["facebook/opt-125m"], + "script_name" : "benchmark_throughput.py", + + "script_args" : { + "backend" : ["vllm"], + "input-len" : [1, 16, 32, 64, 128, 256, 512, 1024], + "output-len" : [128], + "tensor-parallel-size" : [1], + "n" : [1], + "num-prompts" : [1000], + "seed" : [0], + "dtype": ["auto"] + } + }] +} \ No newline at end of file diff --git a/neuralmagic/benchmarks/run_benchmark_serving.py b/neuralmagic/benchmarks/run_benchmark_serving.py index e69de29bb2d1d..7437d21870668 100644 --- a/neuralmagic/benchmarks/run_benchmark_serving.py +++ b/neuralmagic/benchmarks/run_benchmark_serving.py @@ -0,0 +1,71 @@ +import subprocess +import time +import socket + +from typing import NamedTuple +from pathlib import Path + +from neuralmagic.tools.call_cmd import call_cmd +from common import download_datasets, script_args_to_cla + +BENCH_SERVER_HOST = "localhost" +BENCH_SERVER_PORT = 9000 + +def get_this_script_dir() -> Path: + return Path(__file__).parent.resolve() + +def is_server_running(host: str, port: int, timeout=20) -> bool: + def try_connection() -> bool: + try: + sock = socket.create_connection((host, port)) + sock.close() + return True + except Exception as _: + return False + + retries = 5 + timeout_part = timeout / retries + while retries: + time.sleep(timeout_part) + if try_connection(): + return True + retries = retries - 1 + + return False + +def run_benchmark_serving_script(config: NamedTuple, output_directory: Path) -> None: + + def run_bench(server_cmd: str, bench_cmd: list[str]) -> None: + try: + # start server + server_process = subprocess.Popen("exec " + server_cmd, shell=True) + if not is_server_running(BENCH_SERVER_HOST, BENCH_SERVER_PORT): + raise ValueError( + f"Aborting bench run with : server-cmd {server_cmd} , bench-cmd {bench_cmd}. Reason: Cannot start Server" + ) + # run bench + call_cmd(bench_cmd, stdout=None, stderr=None) + finally: + # kill the server + assert server_process is not None + server_process.kill() + + # Process config.download_dataset_cmds + download_datasets(config) + + script_path = get_this_script_dir() / f"scripts/{config.script_name}" + + for model in config.models: + server_cmd = f"python3 -m vllm.entrypoints.api_server --model {model} --tokenizer {model} --host {BENCH_SERVER_HOST} --port {BENCH_SERVER_PORT} --disable-log-requests" + + for script_args in script_args_to_cla(config): + bench_cmd = ( + ["python3", f"{script_path}"] + + script_args + + ["--save-directory", f"{output_directory}"] + + ["--model", f"{model}"] + + ["--tokenizer", f"{model}"] + + ["--port", f"{BENCH_SERVER_PORT}"] + + ["--host", f"{BENCH_SERVER_HOST}"] + ) + run_bench(server_cmd, bench_cmd) diff --git a/neuralmagic/benchmarks/run_benchmark_throughput.py b/neuralmagic/benchmarks/run_benchmark_throughput.py index e69de29bb2d1d..7af54574a0964 100644 --- a/neuralmagic/benchmarks/run_benchmark_throughput.py +++ b/neuralmagic/benchmarks/run_benchmark_throughput.py @@ -0,0 +1,26 @@ +from pathlib import Path +from typing import NamedTuple + +from neuralmagic.tools.call_cmd import call_cmd +from common import download_datasets, script_args_to_cla + +def get_this_script_dir() -> Path: + return Path(__file__).parent.resolve() + +def run_benchmark_throughput_script(config:NamedTuple, output_directory:Path) -> None: + + # Process config.download_dataset_cmds + #download_datasets(config) + + script_path = get_this_script_dir() / f"scripts/{config.script_name}" + + for model in config.models: + for script_args in script_args_to_cla(config): + bench_cmd = ( + ["python3", f"{script_path}"] + + script_args + + ["--save-directory", f"{output_directory}"] + + ["--model", f"{model}"] + + ["--tokenizer", f"{model}"] + ) + call_cmd(bench_cmd, stdout=None, stderr=None) diff --git a/neuralmagic/benchmarks/run_benchmarks.py b/neuralmagic/benchmarks/run_benchmarks.py index 6b25c67930c1e..5abd05643bc0c 100644 --- a/neuralmagic/benchmarks/run_benchmarks.py +++ b/neuralmagic/benchmarks/run_benchmarks.py @@ -1,112 +1,30 @@ import argparse import json -import itertools -import subprocess -import time -import socket from argparse import Namespace from pathlib import Path -from typing import NamedTuple, Iterable - -from neuralmagic.tools.call_cmd import call_cmd - -BENCH_SERVER_HOST = "localhost" -BENCH_SERVER_PORT = 9000 - - -def get_this_script_dir() -> Path: - return Path(__file__).parent.resolve() - - -def is_server_running(host: str, port: int, timeout=20) -> bool: - def try_connection() -> bool: - try: - sock = socket.create_connection((host, port)) - sock.close() - return True - except Exception as e: - return False - - retries = 5 - timeout_part = timeout / retries - while retries: - time.sleep(timeout_part) - if try_connection(): - return True - retries = retries - 1 - - return False - - -def run_bench(server_cmd: str, bench_cmd: list[str]) -> None: - try: - # start server - server_process = subprocess.Popen("exec " + server_cmd, shell=True) - if not is_server_running(BENCH_SERVER_HOST, BENCH_SERVER_PORT): - raise ValueError( - f"Aborting bench run with : server-cmd {server_cmd} , bench-cmd {bench_cmd}. Reason: Cannot start Server" - ) - # run bench - call_cmd(bench_cmd, stdout=None, stderr=None) - finally: - # kill the server - assert server_process is not None - server_process.kill() - - -def script_args_to_cla(kv: dict) -> Iterable[list[str]]: - # Input kv is a dict of lists. The idea is to provide command line args that is a cartesian product of these lists - arg_lists = kv.values() - assert all(map(lambda le: isinstance(le, list), arg_lists)) - - keys = kv.keys() - for args in itertools.product(*arg_lists): - cla = [] - for name, value in zip(keys, args): - cla.extend([f"--{name}", f"{value}"]) - yield cla - - -def run_benchmark_serving_script(config: NamedTuple, output_directory: Path) -> None: - # download all required datasets - for download_cmd in config.dataset_download_cmds: - download_cmd_as_list = list( - filter(lambda x: len(x) != 0, download_cmd.split(" ")) - ) - call_cmd(download_cmd_as_list, stdout=None, stderr=None) - - script_path = get_this_script_dir() / f"scripts/{config.script_name}" - script_args_kv = vars(config.script_args) - - for model in config.models: - server_cmd = f"python3 -m vllm.entrypoints.api_server --model {model} --tokenizer {model} --host {BENCH_SERVER_HOST} --port {BENCH_SERVER_PORT}" - for script_args in script_args_to_cla(script_args_kv): - bench_cmd = ( - ["python3", f"{script_path}"] - + script_args - + ["--save-directory", f"{output_directory}"] - + ["--model", f"{model}"] - + ["--tokenizer", f"{model}"] - + ["--port", f"{BENCH_SERVER_PORT}"] - + ["--host", f"{BENCH_SERVER_HOST}"] - ) - run_bench(server_cmd, bench_cmd) +from run_benchmark_serving import run_benchmark_serving_script +from run_benchmark_throughput import run_benchmark_throughput_script def run(config_file_path: Path, output_directory: Path) -> None: assert config_file_path.exists() - config = None + configs = None with open(config_file_path, "r") as f: - config = json.load(f, object_hook=lambda d: Namespace(**d)) - assert config is not None + configs = json.load(f, object_hook=lambda d: Namespace(**d)) + assert configs is not None - if config.script_name == "benchmark_serving.py": - return run_benchmark_serving_script(config, output_directory) + for config in configs.configs: + if config.script_name == "benchmark_serving.py": + run_benchmark_serving_script(config, output_directory) + continue - raise ValueError(f"Unhandled benchmark script f{config.script_name}") + if config.script_name == "benchmark_throughput.py": + run_benchmark_throughput_script(config, output_directory) + continue + raise ValueError(f"Unhandled benchmark script f{config.script_name}") if __name__ == "__main__": parser = argparse.ArgumentParser( diff --git a/neuralmagic/benchmarks/scripts/backend_request_func.py b/neuralmagic/benchmarks/scripts/backend_request_func.py index a15e5f511fd8b..ef4ccc77b0a2c 100644 --- a/neuralmagic/benchmarks/scripts/backend_request_func.py +++ b/neuralmagic/benchmarks/scripts/backend_request_func.py @@ -1,4 +1,8 @@ -## TODO (varun) this file is copied from upstream main. Figure out what to do before landing +""" +Benchmark serving utilities for various end-points. + +NOTE: This script is copied from upstream vllm repo (february 13th, 2024). +""" import json import os diff --git a/neuralmagic/benchmarks/scripts/benchmark_serving.py b/neuralmagic/benchmarks/scripts/benchmark_serving.py index b23d70ce9d065..c9b6902225142 100644 --- a/neuralmagic/benchmarks/scripts/benchmark_serving.py +++ b/neuralmagic/benchmarks/scripts/benchmark_serving.py @@ -1,5 +1,3 @@ -# TODO (varun) : This files is copied from upstream main - figure out what to do before landing - """Benchmark online serving throughput. On the server side, run one of the following commands: @@ -16,6 +14,8 @@ --backend \ --tokenizer --dataset \ --request-rate + +NOTE: This script is copied from upstream vllm repo (february 13th, 2024). """ import argparse import asyncio @@ -297,7 +297,7 @@ def main(args: argparse.Namespace): ) # Save config and results to json - save_result = len(args.save_directory) != 0 + save_result = args.save_directory is not None if save_result: result_json = {} @@ -409,7 +409,7 @@ def main(args: argparse.Namespace): ) parser.add_argument( - "--save-directory", type=str, help="Output directory to store result file" + "--save-directory", type=str, default=None, help="Output directory to store result file" ) args = parser.parse_args()