Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
Addd benchmark througput script and runner
Browse files Browse the repository at this point in the history
  • Loading branch information
Varun Sundar Rabindranath committed Feb 15, 2024
1 parent 245e011 commit 9b0ff56
Show file tree
Hide file tree
Showing 8 changed files with 201 additions and 114 deletions.
26 changes: 26 additions & 0 deletions neuralmagic/benchmarks/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import itertools
from typing import NamedTuple, Iterable
from neuralmagic.tools.call_cmd import call_cmd

def download_datasets(config:NamedTuple) -> None:
"config is a NamedTuple constructed from some JSON in neuralmagic/benchmarks/configs"
# download all required datasets
for download_cmd in config.dataset_download_cmds:
download_cmd_as_list = list(
filter(lambda x: len(x) != 0, download_cmd.split(" "))
)
call_cmd(download_cmd_as_list, stdout=None, stderr=None)

def script_args_to_cla(config:NamedTuple) -> Iterable[list[str]]:
"config is a NamedTuple constructed from some JSON in neuralmagic/benchmarks/configs"

kv = vars(config.script_args)
arg_lists = kv.values()
assert all(map(lambda le: isinstance(le, list), arg_lists))

keys = kv.keys()
for args in itertools.product(*arg_lists):
cla = []
for name, value in zip(keys, args):
cla.extend([f"--{name}", f"{value}"])
yield cla
32 changes: 18 additions & 14 deletions neuralmagic/benchmarks/configs/benchmark_serving.json
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
{
"description" : "Benchmarking system test",
"configs" : [
{
"description" : "Benchmark vllm serving",

"dataset_download_cmds" : ["wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"],

"models" : ["facebook/opt-125m"],

"script_name" : "benchmark_serving.py",

"dataset_download_cmds" : ["wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"],

"models" : ["facebook/opt-125m"],

"script_name" : "benchmark_serving.py",

"script_args" : {
"num-prompts" : [20, 25],
"request-rate" : [5, 10],
"best-of" : [1],
"dataset" : ["ShareGPT_V3_unfiltered_cleaned_split.json"]
}
}
"script_args" : {
"num-prompts" : [20, 25],
"request-rate" : [5, 10],
"best-of" : [1],
"dataset" : ["ShareGPT_V3_unfiltered_cleaned_split.json"]
}
}
]
}
38 changes: 38 additions & 0 deletions neuralmagic/benchmarks/configs/benchmark_throughput.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"configs" :[ {
"description" : "Benchmark vllm engine throughput - with dataset",
"dataset_download_cmds" : ["wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"],

"models" : ["facebook/opt-125m"],
"script_name" : "benchmark_throughput.py",

"script_args" : {
"backend" : ["vllm"],
"dataset" : ["ShareGPT_V3_unfiltered_cleaned_split.json"],
"output-len" : [128],
"tensor-parallel-size" : [1],
"n" : [1],
"num-prompts" : [1000],
"seed" : [0],
"dtype": ["auto"]
}
},
{
"description" : "Benchmark vllm engine throughput - synthetic",

"dataset_download_cmds" : [],
"models" : ["facebook/opt-125m"],
"script_name" : "benchmark_throughput.py",

"script_args" : {
"backend" : ["vllm"],
"input-len" : [1, 16, 32, 64, 128, 256, 512, 1024],
"output-len" : [128],
"tensor-parallel-size" : [1],
"n" : [1],
"num-prompts" : [1000],
"seed" : [0],
"dtype": ["auto"]
}
}]
}
71 changes: 71 additions & 0 deletions neuralmagic/benchmarks/run_benchmark_serving.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import subprocess
import time
import socket

from typing import NamedTuple
from pathlib import Path

from neuralmagic.tools.call_cmd import call_cmd
from common import download_datasets, script_args_to_cla

BENCH_SERVER_HOST = "localhost"
BENCH_SERVER_PORT = 9000

def get_this_script_dir() -> Path:
return Path(__file__).parent.resolve()

def is_server_running(host: str, port: int, timeout=20) -> bool:
def try_connection() -> bool:
try:
sock = socket.create_connection((host, port))
sock.close()
return True
except Exception as _:
return False

retries = 5
timeout_part = timeout / retries
while retries:
time.sleep(timeout_part)
if try_connection():
return True
retries = retries - 1

return False

def run_benchmark_serving_script(config: NamedTuple, output_directory: Path) -> None:

def run_bench(server_cmd: str, bench_cmd: list[str]) -> None:
try:
# start server
server_process = subprocess.Popen("exec " + server_cmd, shell=True)
if not is_server_running(BENCH_SERVER_HOST, BENCH_SERVER_PORT):
raise ValueError(
f"Aborting bench run with : server-cmd {server_cmd} , bench-cmd {bench_cmd}. Reason: Cannot start Server"
)
# run bench
call_cmd(bench_cmd, stdout=None, stderr=None)
finally:
# kill the server
assert server_process is not None
server_process.kill()

# Process config.download_dataset_cmds
download_datasets(config)

script_path = get_this_script_dir() / f"scripts/{config.script_name}"

for model in config.models:
server_cmd = f"python3 -m vllm.entrypoints.api_server --model {model} --tokenizer {model} --host {BENCH_SERVER_HOST} --port {BENCH_SERVER_PORT} --disable-log-requests"

for script_args in script_args_to_cla(config):
bench_cmd = (
["python3", f"{script_path}"]
+ script_args
+ ["--save-directory", f"{output_directory}"]
+ ["--model", f"{model}"]
+ ["--tokenizer", f"{model}"]
+ ["--port", f"{BENCH_SERVER_PORT}"]
+ ["--host", f"{BENCH_SERVER_HOST}"]
)
run_bench(server_cmd, bench_cmd)
26 changes: 26 additions & 0 deletions neuralmagic/benchmarks/run_benchmark_throughput.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from pathlib import Path
from typing import NamedTuple

from neuralmagic.tools.call_cmd import call_cmd
from common import download_datasets, script_args_to_cla

def get_this_script_dir() -> Path:
return Path(__file__).parent.resolve()

def run_benchmark_throughput_script(config:NamedTuple, output_directory:Path) -> None:

# Process config.download_dataset_cmds
#download_datasets(config)

script_path = get_this_script_dir() / f"scripts/{config.script_name}"

for model in config.models:
for script_args in script_args_to_cla(config):
bench_cmd = (
["python3", f"{script_path}"]
+ script_args
+ ["--save-directory", f"{output_directory}"]
+ ["--model", f"{model}"]
+ ["--tokenizer", f"{model}"]
)
call_cmd(bench_cmd, stdout=None, stderr=None)
108 changes: 13 additions & 95 deletions neuralmagic/benchmarks/run_benchmarks.py
Original file line number Diff line number Diff line change
@@ -1,112 +1,30 @@
import argparse
import json
import itertools
import subprocess
import time
import socket

from argparse import Namespace
from pathlib import Path
from typing import NamedTuple, Iterable

from neuralmagic.tools.call_cmd import call_cmd

BENCH_SERVER_HOST = "localhost"
BENCH_SERVER_PORT = 9000


def get_this_script_dir() -> Path:
return Path(__file__).parent.resolve()


def is_server_running(host: str, port: int, timeout=20) -> bool:
def try_connection() -> bool:
try:
sock = socket.create_connection((host, port))
sock.close()
return True
except Exception as e:
return False

retries = 5
timeout_part = timeout / retries
while retries:
time.sleep(timeout_part)
if try_connection():
return True
retries = retries - 1

return False


def run_bench(server_cmd: str, bench_cmd: list[str]) -> None:
try:
# start server
server_process = subprocess.Popen("exec " + server_cmd, shell=True)
if not is_server_running(BENCH_SERVER_HOST, BENCH_SERVER_PORT):
raise ValueError(
f"Aborting bench run with : server-cmd {server_cmd} , bench-cmd {bench_cmd}. Reason: Cannot start Server"
)
# run bench
call_cmd(bench_cmd, stdout=None, stderr=None)
finally:
# kill the server
assert server_process is not None
server_process.kill()


def script_args_to_cla(kv: dict) -> Iterable[list[str]]:
# Input kv is a dict of lists. The idea is to provide command line args that is a cartesian product of these lists
arg_lists = kv.values()
assert all(map(lambda le: isinstance(le, list), arg_lists))

keys = kv.keys()
for args in itertools.product(*arg_lists):
cla = []
for name, value in zip(keys, args):
cla.extend([f"--{name}", f"{value}"])
yield cla


def run_benchmark_serving_script(config: NamedTuple, output_directory: Path) -> None:
# download all required datasets
for download_cmd in config.dataset_download_cmds:
download_cmd_as_list = list(
filter(lambda x: len(x) != 0, download_cmd.split(" "))
)
call_cmd(download_cmd_as_list, stdout=None, stderr=None)

script_path = get_this_script_dir() / f"scripts/{config.script_name}"
script_args_kv = vars(config.script_args)

for model in config.models:
server_cmd = f"python3 -m vllm.entrypoints.api_server --model {model} --tokenizer {model} --host {BENCH_SERVER_HOST} --port {BENCH_SERVER_PORT}"
for script_args in script_args_to_cla(script_args_kv):
bench_cmd = (
["python3", f"{script_path}"]
+ script_args
+ ["--save-directory", f"{output_directory}"]
+ ["--model", f"{model}"]
+ ["--tokenizer", f"{model}"]
+ ["--port", f"{BENCH_SERVER_PORT}"]
+ ["--host", f"{BENCH_SERVER_HOST}"]
)
run_bench(server_cmd, bench_cmd)

from run_benchmark_serving import run_benchmark_serving_script
from run_benchmark_throughput import run_benchmark_throughput_script

def run(config_file_path: Path, output_directory: Path) -> None:
assert config_file_path.exists()

config = None
configs = None
with open(config_file_path, "r") as f:
config = json.load(f, object_hook=lambda d: Namespace(**d))
assert config is not None
configs = json.load(f, object_hook=lambda d: Namespace(**d))
assert configs is not None

if config.script_name == "benchmark_serving.py":
return run_benchmark_serving_script(config, output_directory)
for config in configs.configs:
if config.script_name == "benchmark_serving.py":
run_benchmark_serving_script(config, output_directory)
continue

raise ValueError(f"Unhandled benchmark script f{config.script_name}")
if config.script_name == "benchmark_throughput.py":
run_benchmark_throughput_script(config, output_directory)
continue

raise ValueError(f"Unhandled benchmark script f{config.script_name}")

if __name__ == "__main__":
parser = argparse.ArgumentParser(
Expand Down
6 changes: 5 additions & 1 deletion neuralmagic/benchmarks/scripts/backend_request_func.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
## TODO (varun) this file is copied from upstream main. Figure out what to do before landing
"""
Benchmark serving utilities for various end-points.
NOTE: This script is copied from upstream vllm repo (february 13th, 2024).
"""

import json
import os
Expand Down
8 changes: 4 additions & 4 deletions neuralmagic/benchmarks/scripts/benchmark_serving.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# TODO (varun) : This files is copied from upstream main - figure out what to do before landing

"""Benchmark online serving throughput.
On the server side, run one of the following commands:
Expand All @@ -16,6 +14,8 @@
--backend <backend> \
--tokenizer <your_model> --dataset <target_dataset> \
--request-rate <request_rate>
NOTE: This script is copied from upstream vllm repo (february 13th, 2024).
"""
import argparse
import asyncio
Expand Down Expand Up @@ -297,7 +297,7 @@ def main(args: argparse.Namespace):
)

# Save config and results to json
save_result = len(args.save_directory) != 0
save_result = args.save_directory is not None
if save_result:
result_json = {}

Expand Down Expand Up @@ -409,7 +409,7 @@ def main(args: argparse.Namespace):
)

parser.add_argument(
"--save-directory", type=str, help="Output directory to store result file"
"--save-directory", type=str, default=None, help="Output directory to store result file"
)

args = parser.parse_args()
Expand Down

0 comments on commit 9b0ff56

Please sign in to comment.