#13368: Update llama3 model test quick PCC

tenstorrent · Oct 29, 2024 · 1ea4e46 · 1ea4e46
1 parent e99901a
commit 1ea4e46
Showing 1 changed file with 7 additions and 8 deletions.
diff --git a/models/demos/llama3/tests/test_llama_model.py b/models/demos/llama3/tests/test_llama_model.py
@@ -54,8 +54,7 @@ def test_llama_model_inference(mesh_device, weights, layers, use_program_cache,
     mesh_device.enable_async(True)
 
     # This sets the minimum PCC for each iteration
-    # TODO: In the full model test, iterations 4 and 8 have lower PCCs of 0.9077 and 0.9593 respectively.
-    pcc = 0.94
+    pcc = 0.88 if layers == 1 else 0.94  # TODO For model test quick (1 layer) one iteration might get a worse PCC
 
     instruct = True if weights == "instruct" else False
     dummy_weights = True if weights == "random" else False
@@ -72,24 +71,24 @@ def test_llama_model_inference(mesh_device, weights, layers, use_program_cache,
     final_model_pcc = {
         "llama32_1b": 0.9991,
         "llama32_3b": 0.9989,
-        "llama31_8b": 0.9976,
+        "llama31_8b": 0.99899,
         "llama32_11b": 0.9976,
-        "llama31_70b": 1.0,  # TBD,
+        "llama31_70b": 1.0,  # TODO,
     }[model_name]
 
     final_k_cache_pcc = {
         "llama32_1b": 0.9998,
         "llama32_3b": 0.9998,
-        "llama31_8b": 0.9995,
+        "llama31_8b": 0.99986,
         "llama32_11b": 0.9995,
-        "llama31_70b": 1.0,  # TBD,
+        "llama31_70b": 1.0,  # TODO,
     }[model_name]
     final_v_cache_pcc = {
         "llama32_1b": 0.9996,
         "llama32_3b": 0.9998,
-        "llama31_8b": 0.9996,
+        "llama31_8b": 0.99986,
         "llama32_11b": 0.9996,
-        "llama31_70b": 1.0,  # TBD,
+        "llama31_70b": 1.0,  # TODO,
     }[model_name]
     quick_iterations = {"llama32_1b": 2, "llama32_3b": 4, "llama31_8b": 6, "llama32_11b": 6, "llama31_70b": 6}[
         model_name