Addd benchmark througput script and runner

neuralmagic · Feb 15, 2024 · 9b0ff56 · 9b0ff56
1 parent 245e011
commit 9b0ff56
Show file tree

Hide file tree

Showing 8 changed files with 201 additions and 114 deletions.
diff --git a/neuralmagic/benchmarks/common.py b/neuralmagic/benchmarks/common.py
@@ -0,0 +1,26 @@
+import itertools
+from typing import NamedTuple, Iterable
+from neuralmagic.tools.call_cmd import call_cmd
+
+def download_datasets(config:NamedTuple) -> None:
+    "config is a NamedTuple constructed from some JSON in neuralmagic/benchmarks/configs"
+    # download all required datasets
+    for download_cmd in config.dataset_download_cmds:
+        download_cmd_as_list = list(
+            filter(lambda x: len(x) != 0, download_cmd.split(" "))
+        )
+        call_cmd(download_cmd_as_list, stdout=None, stderr=None)
+
+def script_args_to_cla(config:NamedTuple) -> Iterable[list[str]]:
+    "config is a NamedTuple constructed from some JSON in neuralmagic/benchmarks/configs"
+
+    kv = vars(config.script_args)
+    arg_lists = kv.values()
+    assert all(map(lambda le: isinstance(le, list), arg_lists))
+
+    keys = kv.keys()
+    for args in itertools.product(*arg_lists):
+        cla = []
+        for name, value in zip(keys, args):
+            cla.extend([f"--{name}", f"{value}"])
+        yield cla
diff --git a/neuralmagic/benchmarks/configs/benchmark_serving.json b/neuralmagic/benchmarks/configs/benchmark_serving.json
@@ -1,16 +1,20 @@
 {
-	"description" : "Benchmarking system test",
+	"configs" : [
+		{
+			"description" : "Benchmark vllm serving",
+
+			"dataset_download_cmds" : ["wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"],
+
+			"models" : ["facebook/opt-125m"],
+
+			"script_name" : "benchmark_serving.py",
 
-	"dataset_download_cmds" : ["wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"],
-
-	"models" : ["facebook/opt-125m"],
-
-	"script_name" : "benchmark_serving.py",
-
-	"script_args" : {
-		"num-prompts" : [20, 25],
-		"request-rate" : [5, 10],
-		"best-of" : [1],
-		"dataset" : ["ShareGPT_V3_unfiltered_cleaned_split.json"] 
-	}
-}
+			"script_args" : {
+				"num-prompts" : [20, 25],
+				"request-rate" : [5, 10],
+				"best-of" : [1],
+				"dataset" : ["ShareGPT_V3_unfiltered_cleaned_split.json"] 
+			}
+		}
+	]
+}
diff --git a/neuralmagic/benchmarks/configs/benchmark_throughput.json b/neuralmagic/benchmarks/configs/benchmark_throughput.json
@@ -0,0 +1,38 @@
+{
+   "configs" :[ {
+    	"description" : "Benchmark vllm engine throughput - with dataset",
+    	"dataset_download_cmds" : ["wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"],
+
+    	"models" : ["facebook/opt-125m"],
+    	"script_name" : "benchmark_throughput.py",
+
+    	"script_args" : {
+            "backend" : ["vllm"],
+    		"dataset" : ["ShareGPT_V3_unfiltered_cleaned_split.json"],
+            "output-len" : [128],
+            "tensor-parallel-size" : [1],
+            "n" : [1],
+            "num-prompts" : [1000],
+            "seed" : [0],
+            "dtype": ["auto"]
+    	}
+    },
+    {
+    	"description" : "Benchmark vllm engine throughput - synthetic",
+
+    	"dataset_download_cmds" : [],
+    	"models" : ["facebook/opt-125m"],
+    	"script_name" : "benchmark_throughput.py",
+
+    	"script_args" : {
+            "backend" : ["vllm"],
+            "input-len" : [1, 16, 32, 64, 128, 256, 512, 1024],
+            "output-len" : [128],
+            "tensor-parallel-size" : [1],
+            "n" : [1],
+            "num-prompts" : [1000],
+            "seed" : [0],
+            "dtype": ["auto"]
+    	}
+    }]
+}
diff --git a/neuralmagic/benchmarks/run_benchmark_serving.py b/neuralmagic/benchmarks/run_benchmark_serving.py
@@ -0,0 +1,71 @@
+import subprocess
+import time
+import socket
+
+from typing import NamedTuple
+from pathlib import Path
+
+from neuralmagic.tools.call_cmd import call_cmd
+from common import download_datasets, script_args_to_cla
+
+BENCH_SERVER_HOST = "localhost"
+BENCH_SERVER_PORT = 9000
+
+def get_this_script_dir() -> Path:
+    return Path(__file__).parent.resolve()
+
+def is_server_running(host: str, port: int, timeout=20) -> bool:
+    def try_connection() -> bool:
+        try:
+            sock = socket.create_connection((host, port))
+            sock.close()
+            return True
+        except Exception as _:
+            return False
+
+    retries = 5
+    timeout_part = timeout / retries
+    while retries:
+        time.sleep(timeout_part)
+        if try_connection():
+            return True
+        retries = retries - 1
+
+    return False
+
+def run_benchmark_serving_script(config: NamedTuple, output_directory: Path) -> None:
+
+    def run_bench(server_cmd: str, bench_cmd: list[str]) -> None:
+        try:
+            # start server
+            server_process = subprocess.Popen("exec " + server_cmd, shell=True)
+            if not is_server_running(BENCH_SERVER_HOST, BENCH_SERVER_PORT):
+                raise ValueError(
+                    f"Aborting bench run with : server-cmd {server_cmd} , bench-cmd {bench_cmd}. Reason: Cannot start Server"
+                )
+            # run bench
+            call_cmd(bench_cmd, stdout=None, stderr=None)
+        finally:
+            # kill the server
+            assert server_process is not None
+            server_process.kill()
+
+    # Process config.download_dataset_cmds
+    download_datasets(config)
+
+    script_path = get_this_script_dir() / f"scripts/{config.script_name}"
+
+    for model in config.models:
+        server_cmd = f"python3 -m vllm.entrypoints.api_server --model {model} --tokenizer {model} --host {BENCH_SERVER_HOST} --port {BENCH_SERVER_PORT} --disable-log-requests"
+
+        for script_args in script_args_to_cla(config):
+            bench_cmd = (
+                ["python3", f"{script_path}"]
+                + script_args
+                + ["--save-directory", f"{output_directory}"]
+                + ["--model", f"{model}"]
+                + ["--tokenizer", f"{model}"]
+                + ["--port", f"{BENCH_SERVER_PORT}"]
+                + ["--host", f"{BENCH_SERVER_HOST}"]
+            )
+            run_bench(server_cmd, bench_cmd)
diff --git a/neuralmagic/benchmarks/run_benchmark_throughput.py b/neuralmagic/benchmarks/run_benchmark_throughput.py
@@ -0,0 +1,26 @@
+from pathlib import Path
+from typing import NamedTuple
+
+from neuralmagic.tools.call_cmd import call_cmd
+from common import download_datasets, script_args_to_cla
+
+def get_this_script_dir() -> Path:
+    return Path(__file__).parent.resolve()
+
+def run_benchmark_throughput_script(config:NamedTuple, output_directory:Path) -> None:
+
+    # Process config.download_dataset_cmds
+    #download_datasets(config)
+
+    script_path = get_this_script_dir() / f"scripts/{config.script_name}"
+
+    for model in config.models:
+        for script_args in script_args_to_cla(config):
+            bench_cmd = (
+                ["python3", f"{script_path}"]
+                + script_args
+                + ["--save-directory", f"{output_directory}"]
+                + ["--model", f"{model}"]
+                + ["--tokenizer", f"{model}"]
+            )
+            call_cmd(bench_cmd, stdout=None, stderr=None)
diff --git a/neuralmagic/benchmarks/run_benchmarks.py b/neuralmagic/benchmarks/run_benchmarks.py
@@ -1,112 +1,30 @@
 import argparse
 import json
-import itertools
-import subprocess
-import time
-import socket
 
 from argparse import Namespace
 from pathlib import Path
-from typing import NamedTuple, Iterable
-
-from neuralmagic.tools.call_cmd import call_cmd
-
-BENCH_SERVER_HOST = "localhost"
-BENCH_SERVER_PORT = 9000
-
-
-def get_this_script_dir() -> Path:
-    return Path(__file__).parent.resolve()
-
-
-def is_server_running(host: str, port: int, timeout=20) -> bool:
-    def try_connection() -> bool:
-        try:
-            sock = socket.create_connection((host, port))
-            sock.close()
-            return True
-        except Exception as e:
-            return False
-
-    retries = 5
-    timeout_part = timeout / retries
-    while retries:
-        time.sleep(timeout_part)
-        if try_connection():
-            return True
-        retries = retries - 1
-
-    return False
-
-
-def run_bench(server_cmd: str, bench_cmd: list[str]) -> None:
-    try:
-        # start server
-        server_process = subprocess.Popen("exec " + server_cmd, shell=True)
-        if not is_server_running(BENCH_SERVER_HOST, BENCH_SERVER_PORT):
-            raise ValueError(
-                f"Aborting bench run with : server-cmd {server_cmd} , bench-cmd {bench_cmd}. Reason: Cannot start Server"
-            )
-        # run bench
-        call_cmd(bench_cmd, stdout=None, stderr=None)
-    finally:
-        # kill the server
-        assert server_process is not None
-        server_process.kill()
-
-
-def script_args_to_cla(kv: dict) -> Iterable[list[str]]:
-    # Input kv is a dict of lists. The idea is to provide command line args that is a cartesian product of these lists
-    arg_lists = kv.values()
-    assert all(map(lambda le: isinstance(le, list), arg_lists))
-
-    keys = kv.keys()
-    for args in itertools.product(*arg_lists):
-        cla = []
-        for name, value in zip(keys, args):
-            cla.extend([f"--{name}", f"{value}"])
-        yield cla
-
-
-def run_benchmark_serving_script(config: NamedTuple, output_directory: Path) -> None:
-    # download all required datasets
-    for download_cmd in config.dataset_download_cmds:
-        download_cmd_as_list = list(
-            filter(lambda x: len(x) != 0, download_cmd.split(" "))
-        )
-        call_cmd(download_cmd_as_list, stdout=None, stderr=None)
-
-    script_path = get_this_script_dir() / f"scripts/{config.script_name}"
-    script_args_kv = vars(config.script_args)
-
-    for model in config.models:
-        server_cmd = f"python3 -m vllm.entrypoints.api_server --model {model} --tokenizer {model} --host {BENCH_SERVER_HOST} --port {BENCH_SERVER_PORT}"
-        for script_args in script_args_to_cla(script_args_kv):
-            bench_cmd = (
-                ["python3", f"{script_path}"]
-                + script_args
-                + ["--save-directory", f"{output_directory}"]
-                + ["--model", f"{model}"]
-                + ["--tokenizer", f"{model}"]
-                + ["--port", f"{BENCH_SERVER_PORT}"]
-                + ["--host", f"{BENCH_SERVER_HOST}"]
-            )
-            run_bench(server_cmd, bench_cmd)
 
+from run_benchmark_serving import run_benchmark_serving_script
+from run_benchmark_throughput import run_benchmark_throughput_script
 
 def run(config_file_path: Path, output_directory: Path) -> None:
     assert config_file_path.exists()
 
-    config = None
+    configs = None
     with open(config_file_path, "r") as f:
-        config = json.load(f, object_hook=lambda d: Namespace(**d))
-    assert config is not None
+        configs = json.load(f, object_hook=lambda d: Namespace(**d))
+    assert configs is not None
 
-    if config.script_name == "benchmark_serving.py":
-        return run_benchmark_serving_script(config, output_directory)
+    for config in configs.configs:
+        if config.script_name == "benchmark_serving.py":
+            run_benchmark_serving_script(config, output_directory)
+            continue
 
-    raise ValueError(f"Unhandled benchmark script f{config.script_name}")
+        if config.script_name == "benchmark_throughput.py":
+            run_benchmark_throughput_script(config, output_directory)
+            continue
 
+        raise ValueError(f"Unhandled benchmark script f{config.script_name}")
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(

diff --git a/neuralmagic/benchmarks/scripts/backend_request_func.py b/neuralmagic/benchmarks/scripts/backend_request_func.py
@@ -1,4 +1,8 @@
-## TODO (varun) this file is copied from upstream main. Figure out what to do before landing
+"""
+Benchmark serving utilities for various end-points.
+
+NOTE: This script is copied from upstream vllm repo (february 13th, 2024).
+"""
 
 import json
 import os

diff --git a/neuralmagic/benchmarks/scripts/benchmark_serving.py b/neuralmagic/benchmarks/scripts/benchmark_serving.py
@@ -1,5 +1,3 @@
-# TODO (varun) : This files is copied from upstream main - figure out what to do before landing
-
 """Benchmark online serving throughput.
 
 On the server side, run one of the following commands:
@@ -16,6 +14,8 @@
         --backend <backend> \
         --tokenizer <your_model> --dataset <target_dataset> \
         --request-rate <request_rate>
+
+NOTE: This script is copied from upstream vllm repo (february 13th, 2024).
 """
 import argparse
 import asyncio
@@ -297,7 +297,7 @@ def main(args: argparse.Namespace):
     )
 
     # Save config and results to json
-    save_result = len(args.save_directory) != 0
+    save_result = args.save_directory is not None
     if save_result:
         result_json = {}
 
@@ -409,7 +409,7 @@ def main(args: argparse.Namespace):
     )
 
     parser.add_argument(
-        "--save-directory", type=str, help="Output directory to store result file"
+        "--save-directory", type=str, default=None, help="Output directory to store result file"
     )
 
     args = parser.parse_args()