From a4e579dde6f250500d542ddf1efc74e540a22905 Mon Sep 17 00:00:00 2001 From: Pavle Josipovic Date: Sun, 15 Dec 2024 21:41:30 +0000 Subject: [PATCH] Fix yolo4 e2e perf measurement --- models/demos/yolov4/tests/test_perf_yolo.py | 56 ++++++++++++++++----- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/models/demos/yolov4/tests/test_perf_yolo.py b/models/demos/yolov4/tests/test_perf_yolo.py index a9e0009e6f5b..7be57629d638 100644 --- a/models/demos/yolov4/tests/test_perf_yolo.py +++ b/models/demos/yolov4/tests/test_perf_yolo.py @@ -20,10 +20,17 @@ from models.perf.perf_utils import prep_perf_report from models.perf.device_perf_utils import run_device_perf, check_device_perf, prep_device_perf_report from models.utility_functions import is_grayskull +from models.utility_functions import ( + profiler, +) + +def get_expected_compile_time_sec(): + return 26 -def get_expected_times(): - return (40, 16.2) + +def get_expected_inference_time_sec(): + return 0.23 @pytest.mark.models_performance_bare_metal @@ -40,6 +47,7 @@ def test_yolov4( model_location_generator, ): disable_persistent_kernel_cache() + profiler.clear() model_path = model_location_generator("models", model_subdir="Yolo") batch_size = input_shape[0] @@ -55,19 +63,41 @@ def test_yolov4( ttnn_model = TtYOLOv4(weights_pth) torch_input_tensor = torch.rand(input_shape, dtype=torch.bfloat16) + ttnn_input = ttnn.from_torch(torch_input_tensor, ttnn.bfloat16) + + logger.info(f"Compiling model with warmup run") + profiler.start(f"inference_and_compile_time") + out1, out2, out3 = ttnn_model(device, ttnn_input) + profiler.end(f"inference_and_compile_time") + + inference_and_compile_time = profiler.get("inference_and_compile_time") + logger.info(f"Model compiled with warmup run in {(inference_and_compile_time):.2f} s") + + iterations = 16 + outputs = [] + logger.info(f"Running inference for {iterations} iterations") + for idx in range(iterations): + profiler.start("inference_time") + profiler.start(f"inference_time_{idx}") + out1, out2, out3 = ttnn_model(device, ttnn_input) + outputs.append(ttnn.from_device(out1, blocking=False)) + outputs.append(ttnn.from_device(out2, blocking=False)) + outputs.append(ttnn.from_device(out3, blocking=False)) + profiler.end(f"inference_time_{idx}") + profiler.end("inference_time") + + mean_inference_time = profiler.get("inference_time") + inference_time = profiler.get(f"inference_time_{iterations - 1}") + compile_time = inference_and_compile_time - inference_time + logger.info(f"Model compilation took {compile_time:.1f} s") + logger.info(f"Inference time on last iterations was completed in {(inference_time * 1000.0):.2f} ms") + logger.info( + f"Mean inference time for {batch_size} (batch) images was {(mean_inference_time * 1000.0):.2f} ms ({batch_size / mean_inference_time:.2f} fps)" + ) - durations = [] - for i in range(2): - tt_input_tensor = ttnn.from_torch(torch_input_tensor, ttnn.bfloat16) - start = time.time() - ttnn_output = ttnn_model(device, tt_input_tensor) - end = time.time() - durations.append(end - start) - enable_persistent_kernel_cache() - - inference_and_compile_time, inference_time, *_ = durations + expected_compile_time = get_expected_compile_time_sec() + expected_inference_time = get_expected_inference_time_sec() - expected_compile_time, expected_inference_time = get_expected_times() prep_perf_report( model_name="yolov4", batch_size=batch_size,