-
Notifications
You must be signed in to change notification settings - Fork 27.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add
benchmarks_entrypoint.py
(#34495)
* feat: add `benchmarks_entrypoint.py` Adding `benchmarks_entrypoint.py` file, which will be run from the benchmarks CI. This python script will list all python files from the `benchmark/` folder and run the included `run_benchmark` function, allowing people to add new benchmarks scripts. * feat: add `MetricsRecorder` * feat: update dashboard * fix: add missing arguments to `MetricsRecorder` * feat: update dash & add datasource + `default.yml` * fix: move responsibility to create `MetricsRecorder` in bench script * fix: update incorrect datasource UID * fix: incorrect variable values * debug: benchmark entrypoint script * refactor: update log level * fix: update broken import * feat: add debug log in `MetricsRecorder` * debug: set log level to debug * fix: set connection `autocommit` to `True`
- Loading branch information
Showing
8 changed files
with
334 additions
and
169 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# Benchmarks | ||
|
||
You might want to add new benchmarks. | ||
|
||
You will need to define a python function named `run_benchmark` in your python file and the file must be located in this `benchmark/` directory. | ||
|
||
The expected function signature is the following: | ||
|
||
```py | ||
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100): | ||
``` | ||
|
||
## Writing metrics to the database | ||
|
||
`MetricRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements. | ||
|
||
cf [`llama.py`](./llama.py) to see an example of this in practice. | ||
|
||
```py | ||
from benchmarks_entrypoint import MetricsRecorder | ||
import psycopg2 | ||
|
||
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100): | ||
metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg) | ||
benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id}) | ||
# To collect device measurements | ||
metrics_recorder.collect_device_measurements( | ||
benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes | ||
) | ||
# To collect your model measurements | ||
metrics_recorder.collect_model_measurements( | ||
benchmark_id, | ||
{ | ||
"model_load_time": model_load_time, | ||
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time, | ||
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time, | ||
"first_eager_generate_time_secs": first_eager_generate_time, | ||
"second_eager_generate_time_secs": second_eager_generate_time, | ||
"time_to_first_token_secs": time_to_first_token, | ||
"time_to_second_token_secs": time_to_second_token, | ||
"time_to_third_token_secs": time_to_third_token, | ||
"time_to_next_token_mean_secs": mean_time_to_next_token, | ||
"first_compile_generate_time_secs": first_compile_generate_time, | ||
"second_compile_generate_time_secs": second_compile_generate_time, | ||
"third_compile_generate_time_secs": third_compile_generate_time, | ||
"fourth_compile_generate_time_secs": fourth_compile_generate_time, | ||
}, | ||
) | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
import argparse | ||
import importlib.util | ||
import logging | ||
import os | ||
from typing import Dict | ||
import psycopg2 | ||
import sys | ||
|
||
from psycopg2.extras import Json | ||
from psycopg2.extensions import register_adapter | ||
|
||
|
||
register_adapter(dict, Json) | ||
|
||
|
||
class ImportModuleException(Exception): | ||
pass | ||
|
||
|
||
class MetricsRecorder: | ||
def __init__(self, connection, logger: logging.Logger, branch: str, commit_id: str, commit_msg: str): | ||
self.conn = connection | ||
self.conn.autocommit = True | ||
self.logger = logger | ||
self.branch = branch | ||
self.commit_id = commit_id | ||
self.commit_msg = commit_msg | ||
|
||
def initialise_benchmark(self, metadata: Dict[str, str]) -> int: | ||
""" | ||
Creates a new benchmark, returns the benchmark id | ||
""" | ||
# gpu_name: str, model_id: str | ||
with self.conn.cursor() as cur: | ||
cur.execute( | ||
"INSERT INTO benchmarks (branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s) RETURNING benchmark_id", | ||
(self.branch, self.commit_id, self.commit_msg, metadata), | ||
) | ||
benchmark_id = cur.fetchone()[0] | ||
logger.debug(f"initialised benchmark #{benchmark_id}") | ||
return benchmark_id | ||
|
||
def collect_device_measurements(self, benchmark_id: int, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes): | ||
""" | ||
Collect device metrics, such as CPU & GPU usage. These are "static", as in you cannot pass arbitrary arguments to the function. | ||
""" | ||
with self.conn.cursor() as cur: | ||
cur.execute( | ||
"INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)", | ||
(benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes), | ||
) | ||
self.logger.debug( | ||
f"inserted device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]" | ||
) | ||
|
||
def collect_model_measurements(self, benchmark_id: int, measurements: Dict[str, float]): | ||
with self.conn.cursor() as cur: | ||
cur.execute( | ||
""" | ||
INSERT INTO model_measurements ( | ||
benchmark_id, | ||
measurements | ||
) VALUES (%s, %s) | ||
""", | ||
( | ||
benchmark_id, | ||
measurements, | ||
), | ||
) | ||
self.logger.debug(f"inserted model measurements for benchmark #{benchmark_id}: {measurements}") | ||
|
||
def close(self): | ||
self.conn.close() | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
logger.setLevel(logging.INFO) | ||
|
||
handler = logging.StreamHandler(sys.stdout) | ||
handler.setLevel(logging.INFO) | ||
formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s") | ||
handler.setFormatter(formatter) | ||
logger.addHandler(handler) | ||
|
||
|
||
def parse_arguments(): | ||
""" | ||
Parse command line arguments for the benchmarking CLI. | ||
""" | ||
parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.") | ||
|
||
parser.add_argument( | ||
"branch", | ||
type=str, | ||
help="The branch name on which the benchmarking is performed.", | ||
) | ||
|
||
parser.add_argument( | ||
"commit_id", | ||
type=str, | ||
help="The commit hash on which the benchmarking is performed.", | ||
) | ||
|
||
parser.add_argument( | ||
"commit_msg", | ||
type=str, | ||
help="The commit message associated with the commit, truncated to 70 characters.", | ||
) | ||
|
||
args = parser.parse_args() | ||
|
||
return args.branch, args.commit_id, args.commit_msg | ||
|
||
|
||
def import_from_path(module_name, file_path): | ||
try: | ||
spec = importlib.util.spec_from_file_location(module_name, file_path) | ||
module = importlib.util.module_from_spec(spec) | ||
sys.modules[module_name] = module | ||
spec.loader.exec_module(module) | ||
return module | ||
except Exception as e: | ||
raise ImportModuleException(f"failed to load python module: {e}") | ||
|
||
|
||
if __name__ == "__main__": | ||
benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__)) | ||
|
||
branch, commit_id, commit_msg = parse_arguments() | ||
|
||
for entry in os.scandir(benchmarks_folder_path): | ||
try: | ||
if not entry.name.endswith(".py"): | ||
continue | ||
if entry.path == __file__: | ||
continue | ||
logger.debug(f"loading: {entry.name}") | ||
module = import_from_path(entry.name.split(".")[0], entry.path) | ||
logger.info(f"runnning benchmarks in: {entry.name}") | ||
module.run_benchmark(logger, branch, commit_id, commit_msg) | ||
except ImportModuleException as e: | ||
logger.error(e) | ||
except Exception as e: | ||
logger.error(f"error running benchmarks for {entry.name}: {e}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
apiVersion: 1 | ||
|
||
providers: | ||
- name: 'Transformers Benchmarks' | ||
orgId: 1 | ||
type: file | ||
updateIntervalSeconds: 10 | ||
allowUiUpdates: true | ||
options: | ||
path: /etc/grafana/dashboards |
Oops, something went wrong.