From 1ea4e46ca8b88db31471e9a4ff2a4988aa4fecd9 Mon Sep 17 00:00:00 2001
From: mtairum <mtairum@tenstorrent.com>
Date: Tue, 29 Oct 2024 12:20:19 +0000
Subject: [PATCH] #13368: Update llama3 model test quick PCC

---
 models/demos/llama3/tests/test_llama_model.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/models/demos/llama3/tests/test_llama_model.py b/models/demos/llama3/tests/test_llama_model.py
index 0edb2abe1f7a..c2bbbd387e8a 100644
--- a/models/demos/llama3/tests/test_llama_model.py
+++ b/models/demos/llama3/tests/test_llama_model.py
@@ -54,8 +54,7 @@ def test_llama_model_inference(mesh_device, weights, layers, use_program_cache,
     mesh_device.enable_async(True)
 
     # This sets the minimum PCC for each iteration
-    # TODO: In the full model test, iterations 4 and 8 have lower PCCs of 0.9077 and 0.9593 respectively.
-    pcc = 0.94
+    pcc = 0.88 if layers == 1 else 0.94  # TODO For model test quick (1 layer) one iteration might get a worse PCC
 
     instruct = True if weights == "instruct" else False
     dummy_weights = True if weights == "random" else False
@@ -72,24 +71,24 @@ def test_llama_model_inference(mesh_device, weights, layers, use_program_cache,
     final_model_pcc = {
         "llama32_1b": 0.9991,
         "llama32_3b": 0.9989,
-        "llama31_8b": 0.9976,
+        "llama31_8b": 0.99899,
         "llama32_11b": 0.9976,
-        "llama31_70b": 1.0,  # TBD,
+        "llama31_70b": 1.0,  # TODO,
     }[model_name]
 
     final_k_cache_pcc = {
         "llama32_1b": 0.9998,
         "llama32_3b": 0.9998,
-        "llama31_8b": 0.9995,
+        "llama31_8b": 0.99986,
         "llama32_11b": 0.9995,
-        "llama31_70b": 1.0,  # TBD,
+        "llama31_70b": 1.0,  # TODO,
     }[model_name]
     final_v_cache_pcc = {
         "llama32_1b": 0.9996,
         "llama32_3b": 0.9998,
-        "llama31_8b": 0.9996,
+        "llama31_8b": 0.99986,
         "llama32_11b": 0.9996,
-        "llama31_70b": 1.0,  # TBD,
+        "llama31_70b": 1.0,  # TODO,
     }[model_name]
     quick_iterations = {"llama32_1b": 2, "llama32_3b": 4, "llama31_8b": 6, "llama32_11b": 6, "llama31_70b": 6}[
         model_name