Skip to content

Commit

Permalink
fixt
Browse files Browse the repository at this point in the history
  • Loading branch information
robertgshaw2-neuralmagic committed Aug 28, 2024
1 parent d179dc2 commit c90927b
Showing 1 changed file with 1 addition and 7 deletions.
8 changes: 1 addition & 7 deletions benchmarks/benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ def run_vllm(
download_dir: Optional[str] = None,
load_format: str = EngineArgs.load_format,
disable_async_output_proc: bool = False,
max_num_seqs: Optional[int] = None,
) -> float:
from vllm import LLM, SamplingParams
llm = LLM(
Expand Down Expand Up @@ -241,7 +240,7 @@ def main(args: argparse.Namespace):
args.max_num_batched_tokens, args.distributed_executor_backend,
args.gpu_memory_utilization, args.num_scheduler_steps,
args.use_v2_block_manager, args.download_dir, args.load_format,
args.disable_async_output_proc, args.max_num_seqs)
args.disable_async_output_proc)
elif args.backend == "hf":
assert args.tensor_parallel_size == 1
elapsed_time = run_hf(requests, args.model, tokenizer, args.n,
Expand Down Expand Up @@ -399,11 +398,6 @@ def main(args: argparse.Namespace):
help='Backend to use for distributed serving. When more than 1 GPU '
'is used, will be automatically set to "ray" if installed '
'or "mp" (multiprocessing) otherwise.')
parser.add_argument(
'--max-num-seqs',
type=int,
default=None,
help='Max num seqs.')
parser.add_argument(
'--load-format',
type=str,
Expand Down

0 comments on commit c90927b

Please sign in to comment.