diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index eaa4b3b2f82456..1bbd1c1e94d08c 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -63,7 +63,7 @@ jobs: commit_id=$GITHUB_SHA fi commit_msg=$(git show -s --format=%s | cut -c1-70) - python3 benchmark/llama.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg" + python3 benchmark/benchmarks_entrypoint.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg" env: HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} # Enable this to see debug logs diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 00000000000000..a827da444f0801 --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,49 @@ +# Benchmarks + +You might want to add new benchmarks. + +You will need to define a python function named `run_benchmark` in your python file and the file must be located in this `benchmark/` directory. + +The expected function signature is the following: + +```py +def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100): +``` + +## Writing metrics to the database + +`MetricRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements. + +cf [`llama.py`](./llama.py) to see an example of this in practice. + +```py +from benchmarks_entrypoint import MetricsRecorder +import psycopg2 + +def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100): + metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg) + benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id}) + # To collect device measurements + metrics_recorder.collect_device_measurements( + benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes + ) + # To collect your model measurements + metrics_recorder.collect_model_measurements( + benchmark_id, + { + "model_load_time": model_load_time, + "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time, + "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time, + "first_eager_generate_time_secs": first_eager_generate_time, + "second_eager_generate_time_secs": second_eager_generate_time, + "time_to_first_token_secs": time_to_first_token, + "time_to_second_token_secs": time_to_second_token, + "time_to_third_token_secs": time_to_third_token, + "time_to_next_token_mean_secs": mean_time_to_next_token, + "first_compile_generate_time_secs": first_compile_generate_time, + "second_compile_generate_time_secs": second_compile_generate_time, + "third_compile_generate_time_secs": third_compile_generate_time, + "fourth_compile_generate_time_secs": fourth_compile_generate_time, + }, + ) +``` diff --git a/benchmark/benchmarks_entrypoint.py b/benchmark/benchmarks_entrypoint.py new file mode 100644 index 00000000000000..7925e2902834f7 --- /dev/null +++ b/benchmark/benchmarks_entrypoint.py @@ -0,0 +1,144 @@ +import argparse +import importlib.util +import logging +import os +from typing import Dict +import psycopg2 +import sys + +from psycopg2.extras import Json +from psycopg2.extensions import register_adapter + + +register_adapter(dict, Json) + + +class ImportModuleException(Exception): + pass + + +class MetricsRecorder: + def __init__(self, connection, logger: logging.Logger, branch: str, commit_id: str, commit_msg: str): + self.conn = connection + self.conn.autocommit = True + self.logger = logger + self.branch = branch + self.commit_id = commit_id + self.commit_msg = commit_msg + + def initialise_benchmark(self, metadata: Dict[str, str]) -> int: + """ + Creates a new benchmark, returns the benchmark id + """ + # gpu_name: str, model_id: str + with self.conn.cursor() as cur: + cur.execute( + "INSERT INTO benchmarks (branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s) RETURNING benchmark_id", + (self.branch, self.commit_id, self.commit_msg, metadata), + ) + benchmark_id = cur.fetchone()[0] + logger.debug(f"initialised benchmark #{benchmark_id}") + return benchmark_id + + def collect_device_measurements(self, benchmark_id: int, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes): + """ + Collect device metrics, such as CPU & GPU usage. These are "static", as in you cannot pass arbitrary arguments to the function. + """ + with self.conn.cursor() as cur: + cur.execute( + "INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)", + (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes), + ) + self.logger.debug( + f"inserted device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]" + ) + + def collect_model_measurements(self, benchmark_id: int, measurements: Dict[str, float]): + with self.conn.cursor() as cur: + cur.execute( + """ + INSERT INTO model_measurements ( + benchmark_id, + measurements + ) VALUES (%s, %s) + """, + ( + benchmark_id, + measurements, + ), + ) + self.logger.debug(f"inserted model measurements for benchmark #{benchmark_id}: {measurements}") + + def close(self): + self.conn.close() + + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +handler = logging.StreamHandler(sys.stdout) +handler.setLevel(logging.INFO) +formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s") +handler.setFormatter(formatter) +logger.addHandler(handler) + + +def parse_arguments(): + """ + Parse command line arguments for the benchmarking CLI. + """ + parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.") + + parser.add_argument( + "branch", + type=str, + help="The branch name on which the benchmarking is performed.", + ) + + parser.add_argument( + "commit_id", + type=str, + help="The commit hash on which the benchmarking is performed.", + ) + + parser.add_argument( + "commit_msg", + type=str, + help="The commit message associated with the commit, truncated to 70 characters.", + ) + + args = parser.parse_args() + + return args.branch, args.commit_id, args.commit_msg + + +def import_from_path(module_name, file_path): + try: + spec = importlib.util.spec_from_file_location(module_name, file_path) + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + except Exception as e: + raise ImportModuleException(f"failed to load python module: {e}") + + +if __name__ == "__main__": + benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__)) + + branch, commit_id, commit_msg = parse_arguments() + + for entry in os.scandir(benchmarks_folder_path): + try: + if not entry.name.endswith(".py"): + continue + if entry.path == __file__: + continue + logger.debug(f"loading: {entry.name}") + module = import_from_path(entry.name.split(".")[0], entry.path) + logger.info(f"runnning benchmarks in: {entry.name}") + module.run_benchmark(logger, branch, commit_id, commit_msg) + except ImportModuleException as e: + logger.error(e) + except Exception as e: + logger.error(f"error running benchmarks for {entry.name}: {e}") diff --git a/benchmark/default.yml b/benchmark/default.yml new file mode 100644 index 00000000000000..f3f02cab34d1bd --- /dev/null +++ b/benchmark/default.yml @@ -0,0 +1,10 @@ +apiVersion: 1 + +providers: + - name: 'Transformers Benchmarks' + orgId: 1 + type: file + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: /etc/grafana/dashboards diff --git a/benchmark/grafana_dashboard.json b/benchmark/grafana_dashboard.json index 3d579f7b368711..caaec78a522303 100644 --- a/benchmark/grafana_dashboard.json +++ b/benchmark/grafana_dashboard.json @@ -30,7 +30,7 @@ "title": "Go to data", "tooltip": "Go to data", "type": "link", - "url": "http://transformers-benchmarks.huggingface.co/d/fdz33iyzln9c0a/transformers-benchmarks?orgId=1&from=${StartTime}&to=${EndTime}" + "url": "http://transformers-benchmarks.hf.co/d/fdz33iyzln9c0a/transformers-benchmarks?orgId=1&from=${StartTime}&to=${EndTime}" } ], "liveNow": true, @@ -77,7 +77,7 @@ "properties": [ { "id": "custom.width", - "value": 196 + "value": 202 } ] }, @@ -101,7 +101,7 @@ "properties": [ { "id": "custom.width", - "value": 581 + "value": 524 } ] }, @@ -113,7 +113,19 @@ "properties": [ { "id": "custom.width", - "value": 379 + "value": 353 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "model_id" + }, + "properties": [ + { + "id": "custom.width", + "value": 216 } ] } @@ -143,12 +155,14 @@ "targets": [ { "datasource": { - "type": "grafana-postgresql-datasource" + "default": true, + "type": "grafana-postgresql-datasource", + "uid": "be28nkzirtb0gd" }, "editorMode": "code", "format": "table", "rawQuery": true, - "rawSql": "SELECT commit_id as commit_id, commit_message, gpu_name, created_at AS date FROM benchmarks WHERE branch = '${branch}' ORDER BY benchmark_id DESC LIMIT ${last_n_commits};", + "rawSql": "SELECT commit_id, commit_message, metadata->>'gpu_name' as gpu_name, metadata->>'model_id' as model_id, created_at AS date FROM benchmarks WHERE branch = '${branch}' AND metadata->>'gpu_name' = '${gpu_name}' ORDER BY benchmark_id DESC LIMIT ${last_n_commits};", "refId": "A", "sql": { "columns": [ @@ -306,13 +320,14 @@ "targets": [ { "datasource": { + "default": true, "type": "grafana-postgresql-datasource", - "uid": "bdz2yss7sxo1sc" + "uid": "be28nkzirtb0gd" }, "editorMode": "code", "format": "table", "rawQuery": true, - "rawSql": "SELECT CAST(m.measurements->'first_eager_forward_pass_time_secs' AS double precision) AS first_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", + "rawSql": "SELECT CAST(m.measurements->'first_eager_forward_pass_time_secs' AS double precision) AS first_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "refId": "A", "sql": { "columns": [ @@ -431,13 +446,14 @@ "targets": [ { "datasource": { + "default": true, "type": "grafana-postgresql-datasource", - "uid": "bdz2yss7sxo1sc" + "uid": "be28nkzirtb0gd" }, "editorMode": "code", "format": "table", "rawQuery": true, - "rawSql": "SELECT CAST(m.measurements->'second_eager_forward_pass_time_secs' AS double precision) AS second_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", + "rawSql": "SELECT CAST(m.measurements->'second_eager_forward_pass_time_secs' AS double precision) AS second_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "refId": "A", "sql": { "columns": [ @@ -565,13 +581,14 @@ "targets": [ { "datasource": { + "default": true, "type": "grafana-postgresql-datasource", - "uid": "bdz2yss7sxo1sc" + "uid": "be28nkzirtb0gd" }, "editorMode": "code", "format": "table", "rawQuery": true, - "rawSql": "SELECT CAST(m.measurements->'time_to_first_token_secs' AS double precision) AS time_to_first_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", + "rawSql": "SELECT CAST(m.measurements->'time_to_first_token_secs' AS double precision) AS time_to_first_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "refId": "A", "sql": { "columns": [ @@ -686,13 +703,14 @@ "targets": [ { "datasource": { + "default": true, "type": "grafana-postgresql-datasource", - "uid": "bdz2yss7sxo1sc" + "uid": "be28nkzirtb0gd" }, "editorMode": "code", "format": "table", "rawQuery": true, - "rawSql": "SELECT CAST(m.measurements->'time_to_second_token_secs' AS double precision) AS time_to_second_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", + "rawSql": "SELECT CAST(m.measurements->'time_to_second_token_secs' AS double precision) AS time_to_second_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "refId": "A", "sql": { "columns": [ @@ -807,13 +825,14 @@ "targets": [ { "datasource": { + "default": true, "type": "grafana-postgresql-datasource", - "uid": "bdz2yss7sxo1sc" + "uid": "be28nkzirtb0gd" }, "editorMode": "code", "format": "table", "rawQuery": true, - "rawSql": "SELECT CAST(m.measurements->'time_to_third_token_secs' AS double precision) AS time_to_third_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", + "rawSql": "SELECT CAST(m.measurements->'time_to_third_token_secs' AS double precision) AS time_to_third_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "refId": "A", "sql": { "columns": [ @@ -928,13 +947,14 @@ "targets": [ { "datasource": { + "default": true, "type": "grafana-postgresql-datasource", - "uid": "bdz2yss7sxo1sc" + "uid": "be28nkzirtb0gd" }, "editorMode": "code", "format": "table", "rawQuery": true, - "rawSql": "SELECT CAST(m.measurements->'time_to_next_token_mean_secs' AS double precision) AS time_to_next_token_mean_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", + "rawSql": "SELECT CAST(m.measurements->'time_to_next_token_mean_secs' AS double precision) AS time_to_next_token_mean_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "refId": "A", "sql": { "columns": [ @@ -1062,13 +1082,14 @@ "targets": [ { "datasource": { + "default": true, "type": "grafana-postgresql-datasource", - "uid": "bdz2yss7sxo1sc" + "uid": "be28nkzirtb0gd" }, "editorMode": "code", "format": "table", "rawQuery": true, - "rawSql": "SELECT CAST(m.measurements->'first_compile_generate_time_secs' AS double precision) AS first_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", + "rawSql": "SELECT CAST(m.measurements->'first_compile_generate_time_secs' AS double precision) AS first_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "refId": "A", "sql": { "columns": [ @@ -1183,13 +1204,14 @@ "targets": [ { "datasource": { + "default": true, "type": "grafana-postgresql-datasource", - "uid": "bdz2yss7sxo1sc" + "uid": "be28nkzirtb0gd" }, "editorMode": "code", "format": "table", "rawQuery": true, - "rawSql": "SELECT CAST(m.measurements->'second_compile_generate_time_secs' AS double precision) AS second_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", + "rawSql": "SELECT CAST(m.measurements->'second_compile_generate_time_secs' AS double precision) AS second_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "refId": "A", "sql": { "columns": [ @@ -1304,13 +1326,14 @@ "targets": [ { "datasource": { + "default": true, "type": "grafana-postgresql-datasource", - "uid": "bdz2yss7sxo1sc" + "uid": "be28nkzirtb0gd" }, "editorMode": "code", "format": "table", "rawQuery": true, - "rawSql": "SELECT CAST(m.measurements->'third_compile_generate_time_secs' AS double precision) AS third_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", + "rawSql": "SELECT CAST(m.measurements->'third_compile_generate_time_secs' AS double precision) AS third_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "refId": "A", "sql": { "columns": [ @@ -1425,13 +1448,14 @@ "targets": [ { "datasource": { + "default": true, "type": "grafana-postgresql-datasource", - "uid": "bdz2yss7sxo1sc" + "uid": "be28nkzirtb0gd" }, "editorMode": "code", "format": "table", "rawQuery": true, - "rawSql": "SELECT CAST(m.measurements->'fourth_compile_generate_time_secs' AS double precision) AS fourth_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", + "rawSql": "SELECT CAST(m.measurements->'fourth_compile_generate_time_secs' AS double precision) AS fourth_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};", "refId": "A", "sql": { "columns": [ @@ -1480,11 +1504,7 @@ "id": 15, "panels": [ { - "datasource": { - "default": true, - "type": "grafana-postgresql-datasource", - "uid": "be28nkzirtb0gd" - }, + "datasource": {}, "fieldConfig": { "defaults": { "color": { @@ -1528,8 +1548,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1563,8 +1582,9 @@ "targets": [ { "datasource": { + "default": true, "type": "grafana-postgresql-datasource", - "uid": "bdz2yss7sxo1sc" + "uid": "be28nkzirtb0gd" }, "editorMode": "code", "format": "table", @@ -1665,11 +1685,7 @@ "type": "timeseries" }, { - "datasource": { - "default": true, - "type": "grafana-postgresql-datasource", - "uid": "be28nkzirtb0gd" - }, + "datasource": {}, "fieldConfig": { "defaults": { "color": { @@ -1713,8 +1729,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1748,8 +1763,9 @@ "targets": [ { "datasource": { + "default": true, "type": "grafana-postgresql-datasource", - "uid": "bdz2yss7sxo1sc" + "uid": "be28nkzirtb0gd" }, "editorMode": "code", "format": "table", @@ -1850,11 +1866,7 @@ "type": "timeseries" }, { - "datasource": { - "default": true, - "type": "grafana-postgresql-datasource", - "uid": "be28nkzirtb0gd" - }, + "datasource": {}, "fieldConfig": { "defaults": { "color": { @@ -1898,8 +1910,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1933,8 +1944,9 @@ "targets": [ { "datasource": { + "default": true, "type": "grafana-postgresql-datasource", - "uid": "bdz2yss7sxo1sc" + "uid": "be28nkzirtb0gd" }, "editorMode": "code", "format": "table", @@ -2035,11 +2047,7 @@ "type": "timeseries" }, { - "datasource": { - "default": true, - "type": "grafana-postgresql-datasource", - "uid": "be28nkzirtb0gd" - }, + "datasource": {}, "fieldConfig": { "defaults": { "color": { @@ -2083,8 +2091,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2118,8 +2125,9 @@ "targets": [ { "datasource": { + "default": true, "type": "grafana-postgresql-datasource", - "uid": "bdz2yss7sxo1sc" + "uid": "be28nkzirtb0gd" }, "editorMode": "code", "format": "table", @@ -2224,7 +2232,6 @@ "type": "row" } ], - "refresh": "", "schemaVersion": 39, "tags": [], "templating": { @@ -2236,6 +2243,7 @@ "value": "main" }, "datasource": { + "default": true, "type": "grafana-postgresql-datasource", "uid": "be28nkzirtb0gd" }, @@ -2248,7 +2256,7 @@ "name": "branch", "options": [], "query": "SELECT DISTINCT branch FROM benchmarks;", - "refresh": 2, + "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 0, @@ -2261,6 +2269,7 @@ "value": "1729701492845" }, "datasource": { + "default": true, "type": "grafana-postgresql-datasource", "uid": "be28nkzirtb0gd" }, @@ -2281,10 +2290,11 @@ { "current": { "selected": false, - "text": "1730120430069", - "value": "1730120430069" + "text": "1730393397577", + "value": "1730393397577" }, "datasource": { + "default": true, "type": "grafana-postgresql-datasource", "uid": "be28nkzirtb0gd" }, @@ -2312,15 +2322,16 @@ "type": "grafana-postgresql-datasource", "uid": "be28nkzirtb0gd" }, - "definition": "SELECT DISTINCT gpu_name FROM benchmarks;", + "definition": "SELECT DISTINCT metadata->>'gpu_name' FROM benchmarks;", + "description": "", "hide": 0, "includeAll": false, "label": "GPU", "multi": false, "name": "gpu_name", "options": [], - "query": "SELECT DISTINCT gpu_name FROM benchmarks;", - "refresh": 2, + "query": "SELECT DISTINCT metadata->>'gpu_name' FROM benchmarks;", + "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 0, @@ -2328,7 +2339,7 @@ }, { "current": { - "selected": false, + "selected": true, "text": "10", "value": "10" }, @@ -2359,6 +2370,6 @@ "timezone": "browser", "title": "Transformers benchmarks", "uid": "fdz33iyzln9c0a", - "version": 4, + "version": 10, "weekStart": "" } diff --git a/benchmark/grafana_datasource.yaml b/benchmark/grafana_datasource.yaml new file mode 100644 index 00000000000000..25f36254104ab5 --- /dev/null +++ b/benchmark/grafana_datasource.yaml @@ -0,0 +1,17 @@ +apiVersion: 1 +datasources: + - name: grafana-postgresql-datasource + uid: be28nkzirtb0gd + type: postgres + url: $GRAFANA_POSTGRES_DATASOURCE_URL + user: $GRAFANA_POSTGRES_DATASOURCE_USER + secureJsonData: + password: $GRAFANA_POSTGRES_DATASOURCE_PWD + jsonData: + database: metrics + maxOpenConns: 100 + maxIdleConns: 100 + maxIdleConnsAuto: true + connMaxLifetime: 14400 + postgresVersion: 1000 + timescaledb: false diff --git a/benchmark/init_db.sql b/benchmark/init_db.sql index 573cc11518e857..a7864c4af183b6 100644 --- a/benchmark/init_db.sql +++ b/benchmark/init_db.sql @@ -3,7 +3,7 @@ CREATE TABLE IF NOT EXISTS benchmarks ( branch VARCHAR(255), commit_id VARCHAR(72), commit_message VARCHAR(70), - gpu_name VARCHAR(255), + metadata jsonb, created_at timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC') ); diff --git a/benchmark/llama.py b/benchmark/llama.py index 4a2c57422e6ffb..bbe1afefd5ef1b 100644 --- a/benchmark/llama.py +++ b/benchmark/llama.py @@ -1,71 +1,25 @@ -import argparse -import json -import logging +from logging import Logger import os -import sys -from statistics import mean from threading import Event, Thread from time import perf_counter, sleep from typing import Optional +from benchmarks_entrypoint import MetricsRecorder import gpustat import psutil import psycopg2 import torch from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache -from psycopg2.extras import Json -from psycopg2.extensions import register_adapter os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - -handler = logging.StreamHandler(sys.stdout) -handler.setLevel(logging.INFO) -formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s") -handler.setFormatter(formatter) -logger.addHandler(handler) - os.environ["TOKENIZERS_PARALLELISM"] = "1" torch.set_float32_matmul_precision("high") -register_adapter(dict, Json) - - -def parse_arguments(): - """ - Parse command line arguments for the benchmarking CLI. - """ - parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.") - - parser.add_argument( - "branch", - type=str, - help="The branch name on which the benchmarking is performed.", - ) - - parser.add_argument( - "commit_id", - type=str, - help="The commit hash on which the benchmarking is performed.", - ) - parser.add_argument( - "commit_msg", - type=str, - help="The commit message associated with the commit, truncated to 70 characters.", - ) - args = parser.parse_args() - - return args.branch, args.commit_id, args.commit_msg - - -def collect_metrics(benchmark_id, continue_metric_collection): +def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder): p = psutil.Process(os.getpid()) - conn = psycopg2.connect("dbname=metrics") - cur = conn.cursor() while not continue_metric_collection.is_set(): with p.oneshot(): cpu_util = p.cpu_percent() @@ -73,47 +27,41 @@ def collect_metrics(benchmark_id, continue_metric_collection): gpu_stats = gpustat.GPUStatCollection.new_query() gpu_util = gpu_stats[0]["utilization.gpu"] gpu_mem_megabytes = gpu_stats[0]["memory.used"] - cur.execute( - "INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)", - (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes), + metrics_recorder.collect_device_measurements( + benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes ) sleep(0.01) - conn.commit() - conn.close() -def run_benchmark(branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100): +def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100): continue_metric_collection = Event() metrics_thread = None + model_id = "meta-llama/Llama-2-7b-hf" + metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg) try: gpu_stats = gpustat.GPUStatCollection.new_query() gpu_name = gpu_stats[0]["name"] - conn = psycopg2.connect("dbname=metrics") - cur = conn.cursor() - cur.execute( - "INSERT INTO benchmarks (branch, commit_id, commit_message, gpu_name) VALUES (%s, %s, %s, %s) RETURNING benchmark_id", - (branch, commit_id, commit_msg, gpu_name), + benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id}) + logger.info(f"running benchmark #{benchmark_id} on {gpu_name} for {model_id}") + metrics_thread = Thread( + target=collect_metrics, + args=[benchmark_id, continue_metric_collection, metrics_recorder], ) - conn.commit() - benchmark_id = cur.fetchone()[0] - logger.info(f"running benchmark #{benchmark_id} on {gpu_name}") - metrics_thread = Thread(target=collect_metrics, args=[benchmark_id, continue_metric_collection]) metrics_thread.start() logger.info("started background thread to fetch device metrics") os.environ["TOKENIZERS_PARALLELISM"] = "false" # silence warnings when compiling device = "cuda" - ckpt = "meta-llama/Llama-2-7b-hf" logger.info("downloading weights") # This is to avoid counting download in model load time measurement - model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16) + model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16) gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1) logger.info("loading model") start = perf_counter() model = AutoModelForCausalLM.from_pretrained( - ckpt, torch_dtype=torch.float16, generation_config=gen_config + model_id, torch_dtype=torch.float16, generation_config=gen_config ).eval() model.to(device) torch.cuda.synchronize() @@ -121,7 +69,7 @@ def run_benchmark(branch: str, commit_id: str, commit_msg: str, num_tokens_to_ge model_load_time = end - start logger.info(f"loaded model in: {model_load_time}s") - tokenizer = AutoTokenizer.from_pretrained(ckpt) + tokenizer = AutoTokenizer.from_pretrained(model_id) prompt = "Why dogs are so cute?" inputs = tokenizer(prompt, return_tensors="pt").to(device) @@ -368,41 +316,27 @@ def decode_one_token(model, cur_token, cache_position, past_key_values): logger.info(f"completed second compile generation in: {fourth_compile_generate_time}s") logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") - cur.execute( - """ - INSERT INTO model_measurements ( - benchmark_id, - measurements - ) VALUES (%s, %s) - """, - ( - benchmark_id, - { - "model_load_time": model_load_time, - "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time, - "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time, - "first_eager_generate_time_secs": first_eager_generate_time, - "second_eager_generate_time_secs": second_eager_generate_time, - "time_to_first_token_secs": time_to_first_token, - "time_to_second_token_secs": time_to_second_token, - "time_to_third_token_secs": time_to_third_token, - "time_to_next_token_mean_secs": mean_time_to_next_token, - "first_compile_generate_time_secs": first_compile_generate_time, - "second_compile_generate_time_secs": second_compile_generate_time, - "third_compile_generate_time_secs": third_compile_generate_time, - "fourth_compile_generate_time_secs": fourth_compile_generate_time, - }, - ), + metrics_recorder.collect_model_measurements( + benchmark_id, + { + "model_load_time": model_load_time, + "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time, + "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time, + "first_eager_generate_time_secs": first_eager_generate_time, + "second_eager_generate_time_secs": second_eager_generate_time, + "time_to_first_token_secs": time_to_first_token, + "time_to_second_token_secs": time_to_second_token, + "time_to_third_token_secs": time_to_third_token, + "time_to_next_token_mean_secs": mean_time_to_next_token, + "first_compile_generate_time_secs": first_compile_generate_time, + "second_compile_generate_time_secs": second_compile_generate_time, + "third_compile_generate_time_secs": third_compile_generate_time, + "fourth_compile_generate_time_secs": fourth_compile_generate_time, + }, ) - conn.commit() - conn.close() except Exception as e: logger.error(f"Caught exception: {e}") continue_metric_collection.set() if metrics_thread is not None: metrics_thread.join() - - -if __name__ == "__main__": - branch, commit_id, commit_msg = parse_arguments() - run_benchmark(branch, commit_id, commit_msg, num_tokens_to_generate=20) + metrics_recorder.close()