Skip to content

Commit

Permalink
#0: Fix ttl.add to tnn.add conversion for falcon40b
Browse files Browse the repository at this point in the history
  • Loading branch information
johanna-rock-tt committed Jun 18, 2024
1 parent 3fe8e14 commit 142c8fe
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -388,13 +388,13 @@ def test_falcon7b_attnention_sliced(
ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR,
)

mm_slice = ttnn.experimental.operations.primary.add(
mm_slice = ttnn.add(
mm_slice,
attn_mask_slice,
fused_activations=None,
output_mem_config=height_sharded_memory_config,
memory_config=height_sharded_memory_config,
output_dtype=ttnn.experimental.tensor.DataType.BFLOAT16,
in_place=True,
output_tensor=mm_slice,
)

attn_mask_slice.deallocate()
Expand Down
24 changes: 12 additions & 12 deletions models/demos/t3000/falcon40b/tt/falcon_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,11 +304,11 @@ def fwd_prefill(
# Note that this is only correct in inference when dropout is disabled
for i in range(len(residual)):
output.append(
ttnn.experimental.operations.primary.add(
ttnn.add(
residual[i],
attention_output[i],
output_mem_config=self.model_config["PARALLEL_ATTN_ADD_OUTPUT_MEMCFG"],
in_place=True,
memory_config=self.model_config["PARALLEL_ATTN_ADD_OUTPUT_MEMCFG"],
output_tensor=residual[i],
)
)
attention_output[i].deallocate(True)
Expand All @@ -320,11 +320,11 @@ def fwd_prefill(
# dropout_add
# For inference, this is just add
for i in range(len(output)):
output[i] = ttnn.experimental.operations.primary.add(
output[i] = ttnn.add(
output[i],
mlp_output[i],
output_mem_config=self.model_config["DROPOUT_ADD_OUTPUT_MEMCFG"],
in_place=True,
memory_config=self.model_config["DROPOUT_ADD_OUTPUT_MEMCFG"],
output_tensor=output[i],
)

mlp_output[i].deallocate(True)
Expand Down Expand Up @@ -421,11 +421,11 @@ def fwd_decode(
# Note that this is only correct in inference when dropout is disabled
for i in range(len(residual)):
output.append(
ttnn.experimental.operations.primary.add(
ttnn.add(
residual[i],
attention_output[i],
output_mem_config=self.model_config["PARALLEL_ATTN_ADD_OUTPUT_MEMCFG"],
in_place=True,
memory_config=self.model_config["PARALLEL_ATTN_ADD_OUTPUT_MEMCFG"],
output_tensor=residual[i],
)
)
attention_output[i].deallocate(True)
Expand All @@ -437,11 +437,11 @@ def fwd_decode(
# dropout_add
# For inference, this is just add
for i in range(len(output)):
output[i] = ttnn.experimental.operations.primary.add(
output[i] = ttnn.add(
output[i],
mlp_output[i],
output_mem_config=self.model_config["DROPOUT_ADD_OUTPUT_MEMCFG"],
in_place=True,
memory_config=self.model_config["DROPOUT_ADD_OUTPUT_MEMCFG"],
output_tensor=output[i],
)

mlp_output[i].deallocate(True)
Expand Down

0 comments on commit 142c8fe

Please sign in to comment.