Skip to content

Commit

Permalink
Fix yolo4 e2e perf measurement
Browse files Browse the repository at this point in the history
  • Loading branch information
Pavle Josipovic committed Dec 15, 2024
1 parent 201eff7 commit a4e579d
Showing 1 changed file with 43 additions and 13 deletions.
56 changes: 43 additions & 13 deletions models/demos/yolov4/tests/test_perf_yolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,17 @@
from models.perf.perf_utils import prep_perf_report
from models.perf.device_perf_utils import run_device_perf, check_device_perf, prep_device_perf_report
from models.utility_functions import is_grayskull
from models.utility_functions import (
profiler,
)


def get_expected_compile_time_sec():
return 26

def get_expected_times():
return (40, 16.2)

def get_expected_inference_time_sec():
return 0.23


@pytest.mark.models_performance_bare_metal
Expand All @@ -40,6 +47,7 @@ def test_yolov4(
model_location_generator,
):
disable_persistent_kernel_cache()
profiler.clear()
model_path = model_location_generator("models", model_subdir="Yolo")
batch_size = input_shape[0]

Expand All @@ -55,19 +63,41 @@ def test_yolov4(
ttnn_model = TtYOLOv4(weights_pth)

torch_input_tensor = torch.rand(input_shape, dtype=torch.bfloat16)
ttnn_input = ttnn.from_torch(torch_input_tensor, ttnn.bfloat16)

logger.info(f"Compiling model with warmup run")
profiler.start(f"inference_and_compile_time")
out1, out2, out3 = ttnn_model(device, ttnn_input)
profiler.end(f"inference_and_compile_time")

inference_and_compile_time = profiler.get("inference_and_compile_time")
logger.info(f"Model compiled with warmup run in {(inference_and_compile_time):.2f} s")

iterations = 16
outputs = []
logger.info(f"Running inference for {iterations} iterations")
for idx in range(iterations):
profiler.start("inference_time")
profiler.start(f"inference_time_{idx}")
out1, out2, out3 = ttnn_model(device, ttnn_input)
outputs.append(ttnn.from_device(out1, blocking=False))
outputs.append(ttnn.from_device(out2, blocking=False))
outputs.append(ttnn.from_device(out3, blocking=False))
profiler.end(f"inference_time_{idx}")
profiler.end("inference_time")

mean_inference_time = profiler.get("inference_time")
inference_time = profiler.get(f"inference_time_{iterations - 1}")
compile_time = inference_and_compile_time - inference_time
logger.info(f"Model compilation took {compile_time:.1f} s")
logger.info(f"Inference time on last iterations was completed in {(inference_time * 1000.0):.2f} ms")
logger.info(
f"Mean inference time for {batch_size} (batch) images was {(mean_inference_time * 1000.0):.2f} ms ({batch_size / mean_inference_time:.2f} fps)"
)

durations = []
for i in range(2):
tt_input_tensor = ttnn.from_torch(torch_input_tensor, ttnn.bfloat16)
start = time.time()
ttnn_output = ttnn_model(device, tt_input_tensor)
end = time.time()
durations.append(end - start)
enable_persistent_kernel_cache()

inference_and_compile_time, inference_time, *_ = durations
expected_compile_time = get_expected_compile_time_sec()
expected_inference_time = get_expected_inference_time_sec()

expected_compile_time, expected_inference_time = get_expected_times()
prep_perf_report(
model_name="yolov4",
batch_size=batch_size,
Expand Down

0 comments on commit a4e579d

Please sign in to comment.