From 2ee94e3836e0bcec591cb3dc252c5bd3fa3dd1e8 Mon Sep 17 00:00:00 2001 From: Shwetank Singh Date: Mon, 2 Dec 2024 11:51:58 +0000 Subject: [PATCH] #15587: yolo perf ci --- models/demos/yolov4/tests/test_perf_yolo.py | 115 ++++++++++++++++++++ tests/scripts/run_performance.sh | 4 + 2 files changed, 119 insertions(+) create mode 100644 models/demos/yolov4/tests/test_perf_yolo.py diff --git a/models/demos/yolov4/tests/test_perf_yolo.py b/models/demos/yolov4/tests/test_perf_yolo.py new file mode 100644 index 00000000000..f04566ebbd1 --- /dev/null +++ b/models/demos/yolov4/tests/test_perf_yolo.py @@ -0,0 +1,115 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + +import torch +import pytest +import ttnn +import time +import os + +from torchvision import models +from loguru import logger +import ttnn +from ttnn.model_preprocessing import preprocess_model_parameters +from models.demos.yolov4.ttnn.yolov4 import TtYOLOv4 +from models.utility_functions import ( + enable_persistent_kernel_cache, + disable_persistent_kernel_cache, +) +from models.perf.perf_utils import prep_perf_report +from models.perf.device_perf_utils import run_device_perf, check_device_perf, prep_device_perf_report +from models.utility_functions import is_grayskull + + +def get_expected_times(): + return (40, 16) + + +@pytest.mark.models_performance_bare_metal +@pytest.mark.parametrize("device_params", [{"l1_small_size": 24576}], indirect=True) +@pytest.mark.parametrize( + "input_shape", + [ + (1, 320, 320, 3), + ], +) +def test_yolov4( + device, + input_shape, + model_location_generator, +): + disable_persistent_kernel_cache() + model_path = model_location_generator("models", model_subdir="Yolo") + batch_size = input_shape[0] + + if model_path == "models": + if not os.path.exists("tests/ttnn/integration_tests/yolov4/yolov4.pth"): # check if yolov4.th is availble + os.system( + "tests/ttnn/integration_tests/yolov4/yolov4_weights_download.sh" + ) # execute the yolov4_weights_download.sh file + + weights_pth = "tests/ttnn/integration_tests/yolov4/yolov4.pth" + else: + weights_pth = str(model_path / "yolov4.pth") + ttnn_model = TtYOLOv4(weights_pth) + + torch_input_tensor = torch.rand(input_shape, dtype=torch.bfloat16) + + durations = [] + for i in range(2): + tt_input_tensor = ttnn.from_torch(torch_input_tensor, ttnn.bfloat16) + start = time.time() + ttnn_output = ttnn_model(device, tt_input_tensor) + end = time.time() + durations.append(end - start) + enable_persistent_kernel_cache() + + inference_and_compile_time, inference_time, *_ = durations + + expected_compile_time, expected_inference_time = get_expected_times() + prep_perf_report( + model_name="yolov4", + batch_size=batch_size, + inference_and_compile_time=inference_and_compile_time, + inference_time=inference_time, + expected_compile_time=expected_compile_time, + expected_inference_time=expected_inference_time, + comments="", + inference_time_cpu=0.0, + ) + + logger.info(f"Compile time: {inference_and_compile_time - inference_time}") + logger.info(f"Inference time: {inference_time}") + logger.info(f"Samples per second: {1 / inference_time * batch_size}") + + +@pytest.mark.parametrize( + "batch_size, model_name", + [ + (1, "yolov4"), + ], +) +@pytest.mark.models_device_performance_bare_metal +def test_perf_device_bare_metal_yolov4(batch_size, model_name): + subdir = model_name + num_iterations = 1 + margin = 0.03 + + expected_perf = 197.89 + command = f"pytest tests/ttnn/integration_tests/yolov4/test_ttnn_yolov4.py" + + cols = ["DEVICE FW", "DEVICE KERNEL", "DEVICE BRISC KERNEL"] + + inference_time_key = "AVG DEVICE KERNEL SAMPLES/S" + expected_perf_cols = {inference_time_key: expected_perf} + + post_processed_results = run_device_perf(command, subdir, num_iterations, cols, batch_size) + expected_results = check_device_perf(post_processed_results, margin, expected_perf_cols) + prep_device_perf_report( + model_name=f"ttnn_functional_{model_name}_{batch_size}", + batch_size=batch_size, + post_processed_results=post_processed_results, + expected_results=expected_results, + comments="", + ) diff --git a/tests/scripts/run_performance.sh b/tests/scripts/run_performance.sh index 5acab7c6def..cfeb37aabc5 100755 --- a/tests/scripts/run_performance.sh +++ b/tests/scripts/run_performance.sh @@ -19,6 +19,8 @@ run_perf_models_other() { env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/wormhole/resnet50/tests/test_perf_e2e_resnet50.py -m $test_marker env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/wormhole/bert_tiny/tests/test_performance.py -m $test_marker + + env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/yolov4/tests/test_perf_yolo.py -m $test_marker fi env pytest -n auto tests/ttnn/integration_tests/bert/test_performance.py -m $test_marker @@ -132,6 +134,8 @@ run_device_perf_models() { env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/falcon7b_common/tests -m $test_marker env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/wormhole/bert_tiny/tests -m $test_marker + + env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/yolov4/tests/ -m $test_marker fi ## Merge all the generated reports