Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
fix formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
LucasWilkinson committed Jul 2, 2024
1 parent c387ce5 commit c2ea075
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 23 deletions.
25 changes: 13 additions & 12 deletions examples/offline_profile.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import argparse
import torch
import sys
import json
import inspect

from dataclasses import dataclass, asdict
import json
import sys
from dataclasses import asdict, dataclass
from typing import Optional

import torch

from vllm import LLM, SamplingParams
from vllm.profiler import nm_profile

Expand Down Expand Up @@ -68,6 +69,7 @@ def run_profile(context: ProfileContext, csv_output: Optional[str],
output_len = context.output_len

scheduler_config = llm.llm_engine.scheduler_config
max_model_len = llm.llm_engine.model_config.max_model_len
max_num_batched_tokens = scheduler_config.max_num_batched_tokens
max_num_seqs = scheduler_config.max_num_seqs

Expand All @@ -89,9 +91,9 @@ def run_profile(context: ProfileContext, csv_output: Optional[str],
llm.llm_engine.model_config.max_model_len)
if prompt_len + output_len > llm.llm_engine.model_config.max_model_len:
print(
f"ERROR: chosen prompt_len + output_len ({prompt_len} + {output_len} = "
f"{prompt_len + output_len}) is larger than the model's max_model_len "
f"({llm.llm_engine.model_config.max_model_len}), please choose a smaller "
f"ERROR: chosen prompt_len + output_len ({prompt_len} + "
f"{output_len} = {prompt_len + output_len}) is larger than the "
f"model's max_model_len ({max_model_len}), please choose a smaller "
f"prompt_len or output_len, or increase --max-model-len")
sys.exit(-1)

Expand Down Expand Up @@ -222,9 +224,8 @@ def run_profile(context: ProfileContext, csv_output: Optional[str],
type=str,
choices=['awq', 'gptq', 'squeezellm', 'marlin', 'smoothquant', None],
default=None,
help="The method used to quantize the model weights, "
"options are \"marlin\", \"awq\", \"gptq\", \"squeezellm\", \"smoothquant\""
)
help="The method used to quantize the model weights, options are "
"\"marlin\", \"awq\", \"gptq\", \"squeezellm\", \"smoothquant\"")
parser.add_argument("--dtype",
type=str,
default='auto',
Expand All @@ -233,7 +234,7 @@ def run_profile(context: ProfileContext, csv_output: Optional[str],
"--max-model-len",
type=int,
default=None,
help=f"Maximum length of a sequence (including prompt and output)")
help="Maximum length of a sequence (including prompt and output)")
parser.add_argument(
"--max-num-batched-tokens",
type=int,
Expand Down
6 changes: 3 additions & 3 deletions neuralmagic/tools/profiler/print_table.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import argparse
import json

from vllm.profiler.nm_profile import SummaryStatsEntry, ModelStatsEntry
from vllm.profiler.utils import indent_string, TablePrinter
from typing import Dict

from vllm.profiler.nm_profile import ModelStatsEntry, SummaryStatsEntry
from vllm.profiler.utils import TablePrinter, indent_string


def flatten_entries(entry_cls, profile_dict: Dict):
entries_and_depth = []
Expand Down
16 changes: 8 additions & 8 deletions neuralmagic/tools/profiler/visualize_trace.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import argparse
import json
import pandas as pd

import matplotlib.pyplot as plt
import pandas as pd


def trim_string_back(string: str, width: int):
Expand Down Expand Up @@ -198,12 +199,11 @@ def plot_metric(metric: str, ax, add_totals=False):
shorten_plot_legend_strings(legend, 50)

context = profile_data["context"]
plt.suptitle(
f"{context['model']}\n"
f"Batch={context['batch_size']}, "
f"PromptLen={context['prompt_len']}, "
f"NumGpus={context['tensor_parallel_size']}"
f"{', Sparsity ' + context['sparsity'] if context.get('sparsity', None) else ''}"
)
sparsity = context.get('sparsity', None)
plt.suptitle(f"{context['model']}\n"
f"Batch={context['batch_size']}, "
f"PromptLen={context['prompt_len']}, "
f"NumGpus={context['tensor_parallel_size']}"
f"{', Sparsity ' + sparsity if sparsity else ''}")
plt.savefig(output, bbox_inches='tight')
print("Created: ", output)

0 comments on commit c2ea075

Please sign in to comment.