diff --git a/models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_60_layer_t3000_prefill_10_loops.py b/models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_60_layer_t3000_prefill_10_loops.py index 37874916e8f..7aafeea00f0 100644 --- a/models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_60_layer_t3000_prefill_10_loops.py +++ b/models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_60_layer_t3000_prefill_10_loops.py @@ -98,18 +98,18 @@ def test_FalconCausalLM_prefill_end_to_end_t3000_ci_loops_10( if data_type == "BFLOAT8_B": if seq_len == 32: out_pcc = 0.983 - k_cache_pcc = 0.985 - v_cache_pcc = 0.957 + k_cache_pcc = 0.982 + v_cache_pcc = 0.949 token_pcc = 0.99 elif seq_len == 128: out_pcc = 0.990 - k_cache_pcc = 0.990 - v_cache_pcc = 0.963 + k_cache_pcc = 0.989 + v_cache_pcc = 0.950 token_pcc = 0.99 elif seq_len == 2048: out_pcc = 0.993 - k_cache_pcc = 0.992 - v_cache_pcc = 0.979 + k_cache_pcc = 0.991 + v_cache_pcc = 0.972 token_pcc = 0.99 elif data_type == "BFLOAT16": if seq_len == 32: diff --git a/models/demos/t3000/falcon40b/tests/test_falcon_end_to_end.py b/models/demos/t3000/falcon40b/tests/test_falcon_end_to_end.py index a8da9772d48..e950e39edd6 100644 --- a/models/demos/t3000/falcon40b/tests/test_falcon_end_to_end.py +++ b/models/demos/t3000/falcon40b/tests/test_falcon_end_to_end.py @@ -474,18 +474,18 @@ def test_FalconCausalLM_end_to_end_with_program_cache( if data_type == "BFLOAT8_B": if seq_len == 32: out_pcc = 0.983 - k_cache_pcc = 0.985 - v_cache_pcc = 0.957 + k_cache_pcc = 0.982 + v_cache_pcc = 0.949 token_pcc = 0.99 elif seq_len == 128: out_pcc = 0.990 - k_cache_pcc = 0.990 - v_cache_pcc = 0.963 + k_cache_pcc = 0.989 + v_cache_pcc = 0.950 token_pcc = 0.99 elif seq_len == 2048: out_pcc = 0.993 - k_cache_pcc = 0.992 - v_cache_pcc = 0.979 + k_cache_pcc = 0.991 + v_cache_pcc = 0.972 token_pcc = 0.99 elif data_type == "BFLOAT16": if seq_len == 32: diff --git a/models/demos/t3000/falcon40b/tt/model_config.py b/models/demos/t3000/falcon40b/tt/model_config.py index 4924f30a104..32a88815de5 100644 --- a/models/demos/t3000/falcon40b/tt/model_config.py +++ b/models/demos/t3000/falcon40b/tt/model_config.py @@ -719,9 +719,6 @@ def get_prefill_model_config(model_config_str, input_shape, num_devices): model_config["ATTN_MASK_DTYPE"] = BFP4_DTYPE - model_config["LN_INPUT_DTYPE"] = BFLOAT16_DTYPE - model_config["LN_MLP_OUTPUT_DTYPE"] = BFLOAT16_DTYPE - model_config["ATTENTION_DTYPE"] = BFLOAT16_DTYPE # used for SDPA model_config["WORD_EMBEDDING_OUTPUT_DTYPE"] = BFLOAT16_DTYPE # embeddings output and the residual stream # Set input df for AllGathers to bfp8 to save data bandwidth