This repository has been archived by the owner on Oct 11, 2024. It is now read-only.
forked from vllm-project/vllm
-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Addd benchmark througput script and runner
- Loading branch information
Varun Sundar Rabindranath
committed
Feb 15, 2024
1 parent
245e011
commit 9b0ff56
Showing
8 changed files
with
201 additions
and
114 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import itertools | ||
from typing import NamedTuple, Iterable | ||
from neuralmagic.tools.call_cmd import call_cmd | ||
|
||
def download_datasets(config:NamedTuple) -> None: | ||
"config is a NamedTuple constructed from some JSON in neuralmagic/benchmarks/configs" | ||
# download all required datasets | ||
for download_cmd in config.dataset_download_cmds: | ||
download_cmd_as_list = list( | ||
filter(lambda x: len(x) != 0, download_cmd.split(" ")) | ||
) | ||
call_cmd(download_cmd_as_list, stdout=None, stderr=None) | ||
|
||
def script_args_to_cla(config:NamedTuple) -> Iterable[list[str]]: | ||
"config is a NamedTuple constructed from some JSON in neuralmagic/benchmarks/configs" | ||
|
||
kv = vars(config.script_args) | ||
arg_lists = kv.values() | ||
assert all(map(lambda le: isinstance(le, list), arg_lists)) | ||
|
||
keys = kv.keys() | ||
for args in itertools.product(*arg_lists): | ||
cla = [] | ||
for name, value in zip(keys, args): | ||
cla.extend([f"--{name}", f"{value}"]) | ||
yield cla |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,20 @@ | ||
{ | ||
"description" : "Benchmarking system test", | ||
"configs" : [ | ||
{ | ||
"description" : "Benchmark vllm serving", | ||
|
||
"dataset_download_cmds" : ["wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"], | ||
|
||
"models" : ["facebook/opt-125m"], | ||
|
||
"script_name" : "benchmark_serving.py", | ||
|
||
"dataset_download_cmds" : ["wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"], | ||
|
||
"models" : ["facebook/opt-125m"], | ||
|
||
"script_name" : "benchmark_serving.py", | ||
|
||
"script_args" : { | ||
"num-prompts" : [20, 25], | ||
"request-rate" : [5, 10], | ||
"best-of" : [1], | ||
"dataset" : ["ShareGPT_V3_unfiltered_cleaned_split.json"] | ||
} | ||
} | ||
"script_args" : { | ||
"num-prompts" : [20, 25], | ||
"request-rate" : [5, 10], | ||
"best-of" : [1], | ||
"dataset" : ["ShareGPT_V3_unfiltered_cleaned_split.json"] | ||
} | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
{ | ||
"configs" :[ { | ||
"description" : "Benchmark vllm engine throughput - with dataset", | ||
"dataset_download_cmds" : ["wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"], | ||
|
||
"models" : ["facebook/opt-125m"], | ||
"script_name" : "benchmark_throughput.py", | ||
|
||
"script_args" : { | ||
"backend" : ["vllm"], | ||
"dataset" : ["ShareGPT_V3_unfiltered_cleaned_split.json"], | ||
"output-len" : [128], | ||
"tensor-parallel-size" : [1], | ||
"n" : [1], | ||
"num-prompts" : [1000], | ||
"seed" : [0], | ||
"dtype": ["auto"] | ||
} | ||
}, | ||
{ | ||
"description" : "Benchmark vllm engine throughput - synthetic", | ||
|
||
"dataset_download_cmds" : [], | ||
"models" : ["facebook/opt-125m"], | ||
"script_name" : "benchmark_throughput.py", | ||
|
||
"script_args" : { | ||
"backend" : ["vllm"], | ||
"input-len" : [1, 16, 32, 64, 128, 256, 512, 1024], | ||
"output-len" : [128], | ||
"tensor-parallel-size" : [1], | ||
"n" : [1], | ||
"num-prompts" : [1000], | ||
"seed" : [0], | ||
"dtype": ["auto"] | ||
} | ||
}] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
import subprocess | ||
import time | ||
import socket | ||
|
||
from typing import NamedTuple | ||
from pathlib import Path | ||
|
||
from neuralmagic.tools.call_cmd import call_cmd | ||
from common import download_datasets, script_args_to_cla | ||
|
||
BENCH_SERVER_HOST = "localhost" | ||
BENCH_SERVER_PORT = 9000 | ||
|
||
def get_this_script_dir() -> Path: | ||
return Path(__file__).parent.resolve() | ||
|
||
def is_server_running(host: str, port: int, timeout=20) -> bool: | ||
def try_connection() -> bool: | ||
try: | ||
sock = socket.create_connection((host, port)) | ||
sock.close() | ||
return True | ||
except Exception as _: | ||
return False | ||
|
||
retries = 5 | ||
timeout_part = timeout / retries | ||
while retries: | ||
time.sleep(timeout_part) | ||
if try_connection(): | ||
return True | ||
retries = retries - 1 | ||
|
||
return False | ||
|
||
def run_benchmark_serving_script(config: NamedTuple, output_directory: Path) -> None: | ||
|
||
def run_bench(server_cmd: str, bench_cmd: list[str]) -> None: | ||
try: | ||
# start server | ||
server_process = subprocess.Popen("exec " + server_cmd, shell=True) | ||
if not is_server_running(BENCH_SERVER_HOST, BENCH_SERVER_PORT): | ||
raise ValueError( | ||
f"Aborting bench run with : server-cmd {server_cmd} , bench-cmd {bench_cmd}. Reason: Cannot start Server" | ||
) | ||
# run bench | ||
call_cmd(bench_cmd, stdout=None, stderr=None) | ||
finally: | ||
# kill the server | ||
assert server_process is not None | ||
server_process.kill() | ||
|
||
# Process config.download_dataset_cmds | ||
download_datasets(config) | ||
|
||
script_path = get_this_script_dir() / f"scripts/{config.script_name}" | ||
|
||
for model in config.models: | ||
server_cmd = f"python3 -m vllm.entrypoints.api_server --model {model} --tokenizer {model} --host {BENCH_SERVER_HOST} --port {BENCH_SERVER_PORT} --disable-log-requests" | ||
|
||
for script_args in script_args_to_cla(config): | ||
bench_cmd = ( | ||
["python3", f"{script_path}"] | ||
+ script_args | ||
+ ["--save-directory", f"{output_directory}"] | ||
+ ["--model", f"{model}"] | ||
+ ["--tokenizer", f"{model}"] | ||
+ ["--port", f"{BENCH_SERVER_PORT}"] | ||
+ ["--host", f"{BENCH_SERVER_HOST}"] | ||
) | ||
run_bench(server_cmd, bench_cmd) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from pathlib import Path | ||
from typing import NamedTuple | ||
|
||
from neuralmagic.tools.call_cmd import call_cmd | ||
from common import download_datasets, script_args_to_cla | ||
|
||
def get_this_script_dir() -> Path: | ||
return Path(__file__).parent.resolve() | ||
|
||
def run_benchmark_throughput_script(config:NamedTuple, output_directory:Path) -> None: | ||
|
||
# Process config.download_dataset_cmds | ||
#download_datasets(config) | ||
|
||
script_path = get_this_script_dir() / f"scripts/{config.script_name}" | ||
|
||
for model in config.models: | ||
for script_args in script_args_to_cla(config): | ||
bench_cmd = ( | ||
["python3", f"{script_path}"] | ||
+ script_args | ||
+ ["--save-directory", f"{output_directory}"] | ||
+ ["--model", f"{model}"] | ||
+ ["--tokenizer", f"{model}"] | ||
) | ||
call_cmd(bench_cmd, stdout=None, stderr=None) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters