From 142c8fe14bc4ff52e7a8bfa44ced4f3213091f81 Mon Sep 17 00:00:00 2001 From: Johanna Rock Date: Tue, 18 Jun 2024 09:16:08 +0000 Subject: [PATCH] #0: Fix ttl.add to tnn.add conversion for falcon40b --- ...n_matmuls_and_bmms_with_mixed_precision.py | 6 ++--- .../t3000/falcon40b/tt/falcon_decoder.py | 24 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/models/demos/falcon7b/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py b/models/demos/falcon7b/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py index b31fd51b723..29c6218b64f 100644 --- a/models/demos/falcon7b/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py +++ b/models/demos/falcon7b/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py @@ -388,13 +388,13 @@ def test_falcon7b_attnention_sliced( ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, ) - mm_slice = ttnn.experimental.operations.primary.add( + mm_slice = ttnn.add( mm_slice, attn_mask_slice, fused_activations=None, - output_mem_config=height_sharded_memory_config, + memory_config=height_sharded_memory_config, output_dtype=ttnn.experimental.tensor.DataType.BFLOAT16, - in_place=True, + output_tensor=mm_slice, ) attn_mask_slice.deallocate() diff --git a/models/demos/t3000/falcon40b/tt/falcon_decoder.py b/models/demos/t3000/falcon40b/tt/falcon_decoder.py index 0d19dd25e13..9de02f5cd34 100644 --- a/models/demos/t3000/falcon40b/tt/falcon_decoder.py +++ b/models/demos/t3000/falcon40b/tt/falcon_decoder.py @@ -304,11 +304,11 @@ def fwd_prefill( # Note that this is only correct in inference when dropout is disabled for i in range(len(residual)): output.append( - ttnn.experimental.operations.primary.add( + ttnn.add( residual[i], attention_output[i], - output_mem_config=self.model_config["PARALLEL_ATTN_ADD_OUTPUT_MEMCFG"], - in_place=True, + memory_config=self.model_config["PARALLEL_ATTN_ADD_OUTPUT_MEMCFG"], + output_tensor=residual[i], ) ) attention_output[i].deallocate(True) @@ -320,11 +320,11 @@ def fwd_prefill( # dropout_add # For inference, this is just add for i in range(len(output)): - output[i] = ttnn.experimental.operations.primary.add( + output[i] = ttnn.add( output[i], mlp_output[i], - output_mem_config=self.model_config["DROPOUT_ADD_OUTPUT_MEMCFG"], - in_place=True, + memory_config=self.model_config["DROPOUT_ADD_OUTPUT_MEMCFG"], + output_tensor=output[i], ) mlp_output[i].deallocate(True) @@ -421,11 +421,11 @@ def fwd_decode( # Note that this is only correct in inference when dropout is disabled for i in range(len(residual)): output.append( - ttnn.experimental.operations.primary.add( + ttnn.add( residual[i], attention_output[i], - output_mem_config=self.model_config["PARALLEL_ATTN_ADD_OUTPUT_MEMCFG"], - in_place=True, + memory_config=self.model_config["PARALLEL_ATTN_ADD_OUTPUT_MEMCFG"], + output_tensor=residual[i], ) ) attention_output[i].deallocate(True) @@ -437,11 +437,11 @@ def fwd_decode( # dropout_add # For inference, this is just add for i in range(len(output)): - output[i] = ttnn.experimental.operations.primary.add( + output[i] = ttnn.add( output[i], mlp_output[i], - output_mem_config=self.model_config["DROPOUT_ADD_OUTPUT_MEMCFG"], - in_place=True, + memory_config=self.model_config["DROPOUT_ADD_OUTPUT_MEMCFG"], + output_tensor=output[i], ) mlp_output[i].deallocate(True)