From 1ea4e46ca8b88db31471e9a4ff2a4988aa4fecd9 Mon Sep 17 00:00:00 2001 From: mtairum Date: Tue, 29 Oct 2024 12:20:19 +0000 Subject: [PATCH] #13368: Update llama3 model test quick PCC --- models/demos/llama3/tests/test_llama_model.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/models/demos/llama3/tests/test_llama_model.py b/models/demos/llama3/tests/test_llama_model.py index 0edb2abe1f7a..c2bbbd387e8a 100644 --- a/models/demos/llama3/tests/test_llama_model.py +++ b/models/demos/llama3/tests/test_llama_model.py @@ -54,8 +54,7 @@ def test_llama_model_inference(mesh_device, weights, layers, use_program_cache, mesh_device.enable_async(True) # This sets the minimum PCC for each iteration - # TODO: In the full model test, iterations 4 and 8 have lower PCCs of 0.9077 and 0.9593 respectively. - pcc = 0.94 + pcc = 0.88 if layers == 1 else 0.94 # TODO For model test quick (1 layer) one iteration might get a worse PCC instruct = True if weights == "instruct" else False dummy_weights = True if weights == "random" else False @@ -72,24 +71,24 @@ def test_llama_model_inference(mesh_device, weights, layers, use_program_cache, final_model_pcc = { "llama32_1b": 0.9991, "llama32_3b": 0.9989, - "llama31_8b": 0.9976, + "llama31_8b": 0.99899, "llama32_11b": 0.9976, - "llama31_70b": 1.0, # TBD, + "llama31_70b": 1.0, # TODO, }[model_name] final_k_cache_pcc = { "llama32_1b": 0.9998, "llama32_3b": 0.9998, - "llama31_8b": 0.9995, + "llama31_8b": 0.99986, "llama32_11b": 0.9995, - "llama31_70b": 1.0, # TBD, + "llama31_70b": 1.0, # TODO, }[model_name] final_v_cache_pcc = { "llama32_1b": 0.9996, "llama32_3b": 0.9998, - "llama31_8b": 0.9996, + "llama31_8b": 0.99986, "llama32_11b": 0.9996, - "llama31_70b": 1.0, # TBD, + "llama31_70b": 1.0, # TODO, }[model_name] quick_iterations = {"llama32_1b": 2, "llama32_3b": 4, "llama31_8b": 6, "llama32_11b": 6, "llama31_70b": 6}[ model_name