Fix yolo4 e2e perf measurement

tenstorrent · Dec 15, 2024 · a4e579d · a4e579d
1 parent 201eff7
commit a4e579d
Showing 1 changed file with 43 additions and 13 deletions.
diff --git a/models/demos/yolov4/tests/test_perf_yolo.py b/models/demos/yolov4/tests/test_perf_yolo.py
@@ -20,10 +20,17 @@
 from models.perf.perf_utils import prep_perf_report
 from models.perf.device_perf_utils import run_device_perf, check_device_perf, prep_device_perf_report
 from models.utility_functions import is_grayskull
+from models.utility_functions import (
+    profiler,
+)
+
 
+def get_expected_compile_time_sec():
+    return 26
 
-def get_expected_times():
-    return (40, 16.2)
+
+def get_expected_inference_time_sec():
+    return 0.23
 
 
 @pytest.mark.models_performance_bare_metal
@@ -40,6 +47,7 @@ def test_yolov4(
     model_location_generator,
 ):
     disable_persistent_kernel_cache()
+    profiler.clear()
     model_path = model_location_generator("models", model_subdir="Yolo")
     batch_size = input_shape[0]
 
@@ -55,19 +63,41 @@ def test_yolov4(
     ttnn_model = TtYOLOv4(weights_pth)
 
     torch_input_tensor = torch.rand(input_shape, dtype=torch.bfloat16)
+    ttnn_input = ttnn.from_torch(torch_input_tensor, ttnn.bfloat16)
+
+    logger.info(f"Compiling model with warmup run")
+    profiler.start(f"inference_and_compile_time")
+    out1, out2, out3 = ttnn_model(device, ttnn_input)
+    profiler.end(f"inference_and_compile_time")
+
+    inference_and_compile_time = profiler.get("inference_and_compile_time")
+    logger.info(f"Model compiled with warmup run in {(inference_and_compile_time):.2f} s")
+
+    iterations = 16
+    outputs = []
+    logger.info(f"Running inference for {iterations} iterations")
+    for idx in range(iterations):
+        profiler.start("inference_time")
+        profiler.start(f"inference_time_{idx}")
+        out1, out2, out3 = ttnn_model(device, ttnn_input)
+        outputs.append(ttnn.from_device(out1, blocking=False))
+        outputs.append(ttnn.from_device(out2, blocking=False))
+        outputs.append(ttnn.from_device(out3, blocking=False))
+        profiler.end(f"inference_time_{idx}")
+        profiler.end("inference_time")
+
+    mean_inference_time = profiler.get("inference_time")
+    inference_time = profiler.get(f"inference_time_{iterations - 1}")
+    compile_time = inference_and_compile_time - inference_time
+    logger.info(f"Model compilation took {compile_time:.1f} s")
+    logger.info(f"Inference time on last iterations was completed in {(inference_time * 1000.0):.2f} ms")
+    logger.info(
+        f"Mean inference time for {batch_size} (batch) images was {(mean_inference_time * 1000.0):.2f} ms ({batch_size / mean_inference_time:.2f} fps)"
+    )
 
-    durations = []
-    for i in range(2):
-        tt_input_tensor = ttnn.from_torch(torch_input_tensor, ttnn.bfloat16)
-        start = time.time()
-        ttnn_output = ttnn_model(device, tt_input_tensor)
-        end = time.time()
-        durations.append(end - start)
-        enable_persistent_kernel_cache()
-
-    inference_and_compile_time, inference_time, *_ = durations
+    expected_compile_time = get_expected_compile_time_sec()
+    expected_inference_time = get_expected_inference_time_sec()
 
-    expected_compile_time, expected_inference_time = get_expected_times()
     prep_perf_report(
         model_name="yolov4",
         batch_size=batch_size,