From 334e96392227e6be2637b9683478b058d63228c8 Mon Sep 17 00:00:00 2001 From: Borys Bradel <164946524+bbradelTT@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:41:40 -0400 Subject: [PATCH] #9492: Update parameter names for linear in test_experimental.py (#9627) * #9492: Update parameter names for linear in test_experimental.py * #9492: Add import ttnn when convert matmul program config to ttnn --- .../bert/tt/ttnn_optimized_sharded_bert.py | 12 +-- ...n_matmuls_and_bmms_with_mixed_precision.py | 8 +- models/demos/falcon7b/tt/falcon_lm_head.py | 2 +- models/demos/falcon7b/tt/model_config.py | 14 ++- .../metal_BERT_large_11/tt/custom_matmuls.py | 12 +-- .../metal_BERT_large_11/tt/model_config.py | 23 ++-- .../demos/resnet/tests/test_resnet50_conv.py | 65 +++++------ models/demos/resnet/tt/metalResnetBlock50.py | 102 +++++++++--------- .../tests/test_falcon_model_single_chip.py | 10 +- .../demos/t3000/falcon40b/tt/model_config.py | 28 ++--- .../demos/t3000/falcon40b/tt/model_utils.py | 4 +- .../llama2_70b/scripts/model_config_n150.py | 49 ++++----- .../t3000/llama2_70b/tests/perf/ivan_ff.py | 3 +- .../t3000/llama2_70b/tests/perf/test_bmm.py | 3 +- .../t3000/llama2_70b/tests/perf/test_ff1.py | 3 +- .../tests/perf/test_llama_matmul_perf.py | 16 +-- .../demos/t3000/llama2_70b/tt/model_config.py | 67 +++++------- .../t3000/mixtral8x7b/tt/model_config.py | 36 ++----- .../mistral7b/tt/mistral_attention.py | 10 +- .../tt2/ttnn_functional_cross_attention.py | 26 ++--- .../tt2/ttnn_functional_feedforward.py | 2 +- .../tt2/ttnn_functional_geglu.py | 4 +- .../tt2/ttnn_functional_resnetblock2d.py | 2 +- .../ttnn_functional_resnetblock2d_new_conv.py | 2 +- .../tests/test_reproduce_nd_matmul.py | 5 +- .../tt/ttnn_optimized_sharded_vit.py | 16 +-- .../tt/ttnn_optimized_sharded_vit_backup.py | 16 +-- .../llama2_70b/scripts/model_config_n150.py | 49 ++++----- .../llama2_70b/tests/perf/ivan_ff.py | 3 +- .../llama2_70b/tests/perf/test_bmm.py | 3 +- .../llama2_70b/tests/perf/test_ff1.py | 3 +- .../tests/perf/test_llama_matmul_perf.py | 17 +-- .../tests/unit_tests/test_rotary_matmul.py | 8 +- .../llama2_70b/tt/model_config.py | 35 +++--- .../resnet/tt/ttnn_functional_resnet50.py | 6 +- .../ttnn_functional_resnet50_new_conv_api.py | 6 +- 36 files changed, 314 insertions(+), 356 deletions(-) diff --git a/models/demos/bert/tt/ttnn_optimized_sharded_bert.py b/models/demos/bert/tt/ttnn_optimized_sharded_bert.py index c2543d97abe..560deab0043 100644 --- a/models/demos/bert/tt/ttnn_optimized_sharded_bert.py +++ b/models/demos/bert/tt/ttnn_optimized_sharded_bert.py @@ -20,7 +20,7 @@ def update_model_config(config, batch_size): core_grid = ttnn.CoreGrid(y=8, x=batch_size) program_configs = { - "query_key_value_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "query_key_value_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=4, out_subblock_h=1, @@ -30,7 +30,7 @@ def update_model_config(config, batch_size): transpose_mcast=True, fused_activation=None, ), - "query_by_key_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig( + "query_by_key_matmul_program_config": ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=2, out_subblock_h=1, @@ -38,7 +38,7 @@ def update_model_config(config, batch_size): per_core_M=24, per_core_N=12, ), - "attention_probabilities_by_value_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig( + "attention_probabilities_by_value_matmul_program_config": ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=12, out_subblock_h=4, @@ -46,7 +46,7 @@ def update_model_config(config, batch_size): per_core_M=24, per_core_N=2, ), - "self_output_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "self_output_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=4, out_subblock_h=2, @@ -56,7 +56,7 @@ def update_model_config(config, batch_size): transpose_mcast=True, fused_activation=None, ), - "ff1_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "ff1_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=4, out_subblock_h=1, @@ -66,7 +66,7 @@ def update_model_config(config, batch_size): transpose_mcast=True, fused_activation=(ttnn.experimental.tensor.FusibleActivation.GELU, True), ), - "ff2_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "ff2_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=16, out_subblock_h=2, diff --git a/models/demos/falcon7b/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py b/models/demos/falcon7b/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py index a11f5f84c51..0e36ca763d8 100644 --- a/models/demos/falcon7b/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py +++ b/models/demos/falcon7b/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py @@ -344,7 +344,7 @@ def test_falcon7b_attnention_sliced( subblock_w = 1 if seq_len == 2048: subblock_w = 8 # best option - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=2, per_core_M=tiles_per_shard, @@ -415,7 +415,7 @@ def test_falcon7b_attnention_sliced( subblock_w = 2 subblock_h = 1 - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=seq_len // 32, per_core_M=tiles_per_shard, @@ -641,7 +641,7 @@ def test_falcon7b_attention_softmax_sequence( subblock_w = 1 if seq_len == 2048: subblock_w = 8 # best option - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=2, per_core_M=tiles_per_shard, @@ -686,7 +686,7 @@ def test_falcon7b_attention_softmax_sequence( subblock_w = 2 subblock_h = 1 - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=seq_len // 32, per_core_M=tiles_per_shard, diff --git a/models/demos/falcon7b/tt/falcon_lm_head.py b/models/demos/falcon7b/tt/falcon_lm_head.py index a9492f872cb..4add4c53a3a 100644 --- a/models/demos/falcon7b/tt/falcon_lm_head.py +++ b/models/demos/falcon7b/tt/falcon_lm_head.py @@ -61,7 +61,7 @@ def falcon_lm_head_matmul_2d( per_core_N = nearest_y(weights_n_in_tiles / grid.x, out_subblock_w) in0_block_w = 4 if seq_len <= 1024 else 8 - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=grid, in0_block_w=in0_block_w, out_subblock_h=out_subblock_h, diff --git a/models/demos/falcon7b/tt/model_config.py b/models/demos/falcon7b/tt/model_config.py index ea528d3f5d5..46ec97f9f6f 100644 --- a/models/demos/falcon7b/tt/model_config.py +++ b/models/demos/falcon7b/tt/model_config.py @@ -267,7 +267,7 @@ def get_model_config(model_config_str, prefill_seq_len=0, decode_batch_size=32): model_config[ "ATTN_BATCHED_MM_PROGCFG" - ] = lambda block_w, per_core_M, per_core_N: ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig( + ] = lambda block_w, per_core_M, per_core_N: ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=[8, 4], in0_block_w=block_w, out_subblock_h=1, # TODO: Maximize @@ -326,7 +326,7 @@ def set_prefill_config(model_config, seq_len, dram_memcfg): ) model_config["MLP_KERNEL_CONFIG"] = default_kernel_config - mm_h_to_4h_prog_cfg = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + mm_h_to_4h_prog_cfg = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=model_config["MLP_GRID_SIZE"], in0_block_w=3, out_subblock_h=1, @@ -338,7 +338,7 @@ def set_prefill_config(model_config, seq_len, dram_memcfg): ) model_config["DENSE_H_TO_4H_MM_PROGCFG"] = mm_h_to_4h_prog_cfg - mm_4h_to_h_prog_cfg = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + mm_4h_to_h_prog_cfg = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=model_config["MLP_GRID_SIZE"], in0_block_w=8, out_subblock_h=1, @@ -352,9 +352,7 @@ def set_prefill_config(model_config, seq_len, dram_memcfg): model_config["MLP_INTERLEAVED_TO_SHARDED_MEM_CFG"] = dram_memcfg model_config["FUSED_QKV_MM_OPTIMIZED_MEMCFG"] = dram_memcfg - model_config[ - "FUSED_QKV_MM_OPTIMIZED_PROGCFG" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + model_config["FUSED_QKV_MM_OPTIMIZED_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=2, per_core_M=8, @@ -380,7 +378,7 @@ def set_prefill_config(model_config, seq_len, dram_memcfg): model_config[ "QKT_OPTIMIZED_PROGCFG" - ] = lambda tiles_per_shard, seq_len, subblock_h, subblock_w: ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + ] = lambda tiles_per_shard, seq_len, subblock_h, subblock_w: ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=model_config["ATTN_OPTIMIZED_GRID_SIZE"], in0_block_w=2, per_core_M=tiles_per_shard, @@ -414,7 +412,7 @@ def set_prefill_config(model_config, seq_len, dram_memcfg): model_config[ "QKTV_MM_OPTIMIZED_PROGCFG" - ] = lambda tiles_per_shard, seq_len, subblock_h: ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + ] = lambda tiles_per_shard, seq_len, subblock_h: ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=model_config["ATTN_OPTIMIZED_GRID_SIZE"], in0_block_w=seq_len // 32, per_core_M=tiles_per_shard, diff --git a/models/demos/metal_BERT_large_11/tt/custom_matmuls.py b/models/demos/metal_BERT_large_11/tt/custom_matmuls.py index 5d4e411a593..55fea61d703 100644 --- a/models/demos/metal_BERT_large_11/tt/custom_matmuls.py +++ b/models/demos/metal_BERT_large_11/tt/custom_matmuls.py @@ -14,7 +14,7 @@ def bert_large_fused_qkv_matmul( assert input_tensor_a.get_legacy_shape() == [batch_size, 1, 384, 1024], "Unsupported input shape" assert input_tensor_b.get_legacy_shape() == [1, 1, 1024, 3072], "Unsupported input shape" - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(12, batch_size), in0_block_w=4, out_subblock_h=4, @@ -59,7 +59,7 @@ def bert_large_ff1_matmul( assert input_tensor_a.get_legacy_shape() == [batch_size, 1, 384, 1024], "Unsupported input shape" assert input_tensor_b.get_legacy_shape() == [1, 1, 1024, 4096], "Unsupported input shape" - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(12, batch_size), in0_block_w=4, out_subblock_h=6, @@ -87,7 +87,7 @@ def bert_large_ff2_matmul( assert input_tensor_a.get_legacy_shape() == [batch_size, 1, 384, 4096], "Unsupported input shape" assert input_tensor_b.get_legacy_shape() == [1, 1, 4096, 1024], "Unsupported input shape" - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(12, batch_size), in0_block_w=4, out_subblock_h=6, @@ -115,7 +115,7 @@ def bert_large_selfout_matmul( assert input_tensor_a.get_legacy_shape() == [batch_size, 1, 384, 1024], "Unsupported input shape" assert input_tensor_b.get_legacy_shape() == [1, 1, 1024, 1024], "Unsupported input shape" - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(12, batch_size), in0_block_w=4, out_subblock_h=6, @@ -143,7 +143,7 @@ def bert_large_pre_softmax_bmm( assert input_tensor_a.get_legacy_shape() == [batch_size, 16, 384, 64], "Unsupported input shape" assert input_tensor_b.get_legacy_shape() == [batch_size, 16, 64, 384], "Unsupported input shape" - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=(12, batch_size), in0_block_w=1, out_subblock_h=4, @@ -168,7 +168,7 @@ def bert_large_post_softmax_bmm( assert input_tensor_a.get_legacy_shape() == [batch_size, 16, 384, 384], "Unsupported input shape" assert input_tensor_b.get_legacy_shape() == [batch_size, 16, 384, 64], "Unsupported input shape" - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=(12, batch_size), in0_block_w=2, out_subblock_h=4, diff --git a/models/demos/metal_BERT_large_11/tt/model_config.py b/models/demos/metal_BERT_large_11/tt/model_config.py index 05b10f9d1eb..569b71e519c 100644 --- a/models/demos/metal_BERT_large_11/tt/model_config.py +++ b/models/demos/metal_BERT_large_11/tt/model_config.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import tt_lib +import ttnn from loguru import logger from pathlib import Path from models.utility_functions import is_wormhole_b0 @@ -197,7 +198,7 @@ def get_model_config(batch, device_grid_size, model_config_str): elif model_config_str == "BFLOAT8_B-L1" or model_config_str == "BFLOAT8_B-DRAM": grid_size = [12, batch] new_config_values = { - "OP3_PRE_SOFTMAX_BMM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseProgramConfig( + "OP3_PRE_SOFTMAX_BMM_CONFIG": ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=2, out_subblock_h=1, @@ -205,7 +206,7 @@ def get_model_config(batch, device_grid_size, model_config_str): per_core_M=12, per_core_N=12, ), - "OP5_POST_SOFTMAX_BMM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseProgramConfig( + "OP5_POST_SOFTMAX_BMM_CONFIG": ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=12, out_subblock_h=4, @@ -213,7 +214,7 @@ def get_model_config(batch, device_grid_size, model_config_str): per_core_M=12, per_core_N=2, ), - "OP7_SELFOUT_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "OP7_SELFOUT_CONFIG": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=4, out_subblock_h=2, @@ -223,7 +224,7 @@ def get_model_config(batch, device_grid_size, model_config_str): transpose_mcast=False, fused_activation=None, ), - "OP9_FF1_MM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "OP9_FF1_MM_CONFIG": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=4, out_subblock_h=1, @@ -233,7 +234,7 @@ def get_model_config(batch, device_grid_size, model_config_str): transpose_mcast=False, fused_activation=(tt_lib.tensor.FusibleActivation.GELU, True), ), - "OP10_FF2_MM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "OP10_FF2_MM_CONFIG": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=16, out_subblock_h=2, @@ -326,7 +327,7 @@ def get_model_config(batch, device_grid_size, model_config_str): "OP11_LAYERNORM_GAMMA_MEMCFG": DRAM_MEMCFG, "OP11_LAYERNORM_BETA_MEMCFG": DRAM_MEMCFG, "RESERVE_SPLIT_HEADS_SHAPE": [1, 1, 1, 153 * 1024 // 2], - "OP1_FUSED_QKV_MM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "OP1_FUSED_QKV_MM_CONFIG": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=4, out_subblock_h=1, @@ -336,7 +337,7 @@ def get_model_config(batch, device_grid_size, model_config_str): transpose_mcast=transpose_mm_mcast, fused_activation=None, ), - "OP3_PRE_SOFTMAX_BMM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseProgramConfig( + "OP3_PRE_SOFTMAX_BMM_CONFIG": ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=2, out_subblock_h=1, @@ -344,7 +345,7 @@ def get_model_config(batch, device_grid_size, model_config_str): per_core_M=24, per_core_N=12, ), - "OP5_POST_SOFTMAX_BMM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseProgramConfig( + "OP5_POST_SOFTMAX_BMM_CONFIG": ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=12, out_subblock_h=4, @@ -352,7 +353,7 @@ def get_model_config(batch, device_grid_size, model_config_str): per_core_M=24, per_core_N=2, ), - "OP7_SELFOUT_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "OP7_SELFOUT_CONFIG": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=4, out_subblock_h=2, @@ -362,7 +363,7 @@ def get_model_config(batch, device_grid_size, model_config_str): transpose_mcast=transpose_mm_mcast, fused_activation=None, ), - "OP9_FF1_MM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "OP9_FF1_MM_CONFIG": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=4, out_subblock_h=1, @@ -372,7 +373,7 @@ def get_model_config(batch, device_grid_size, model_config_str): transpose_mcast=transpose_mm_mcast, fused_activation=(tt_lib.tensor.FusibleActivation.GELU, True), ), - "OP10_FF2_MM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "OP10_FF2_MM_CONFIG": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=16, out_subblock_h=2, diff --git a/models/demos/resnet/tests/test_resnet50_conv.py b/models/demos/resnet/tests/test_resnet50_conv.py index 505651bc4ed..27ae078d76d 100644 --- a/models/demos/resnet/tests/test_resnet50_conv.py +++ b/models/demos/resnet/tests/test_resnet50_conv.py @@ -6,6 +6,7 @@ import torch import pytest import tt_lib +import ttnn from models.utility_functions import comp_pcc from models.demos.resnet.tt.metalResnetBlock50 import ( compute_conv_output_shape, @@ -215,7 +216,7 @@ }, }, 8: { - (25088, 64, 64): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (25088, 64, 64): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=2, out_subblock_h=4, @@ -226,7 +227,7 @@ fused_activation=None, mcast_in0=False, ), - (25088, 64, 256): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (25088, 64, 256): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=2, out_subblock_h=1, @@ -237,7 +238,7 @@ fused_activation=None, mcast_in0=False, ), - (25088, 256, 64): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (25088, 256, 64): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=8, out_subblock_h=4, @@ -248,7 +249,7 @@ fused_activation=None, mcast_in0=False, ), - (25088, 256, 128): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (25088, 256, 128): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=8, out_subblock_h=2, @@ -259,7 +260,7 @@ fused_activation=None, mcast_in0=False, ), - (6272, 128, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (6272, 128, 512): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=4, out_subblock_h=1, @@ -270,7 +271,7 @@ fused_activation=None, mcast_in0=False, ), - (6272, 256, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (6272, 256, 512): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=8, out_subblock_h=1, @@ -281,7 +282,7 @@ fused_activation=None, mcast_in0=False, ), - (6272, 512, 128): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (6272, 512, 128): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=16, out_subblock_h=2, @@ -292,7 +293,7 @@ fused_activation=None, mcast_in0=False, ), - (6272, 512, 256): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (6272, 512, 256): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(10, 8), in0_block_w=2, out_subblock_h=5, @@ -302,7 +303,7 @@ transpose_mcast=True, fused_activation=None, ), - (1568, 256, 1024): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (1568, 256, 1024): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(10, 8), in0_block_w=1, out_subblock_h=1, @@ -312,7 +313,7 @@ transpose_mcast=True, fused_activation=None, ), - (1568, 1024, 256): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (1568, 1024, 256): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(10, 8), in0_block_w=4, out_subblock_h=5, @@ -322,7 +323,7 @@ transpose_mcast=True, fused_activation=None, ), - (1568, 1024, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (1568, 1024, 512): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(10, 8), in0_block_w=4, out_subblock_h=1, @@ -332,7 +333,7 @@ transpose_mcast=True, fused_activation=None, ), - (1568, 512, 1024): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (1568, 512, 1024): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(10, 8), in0_block_w=2, out_subblock_h=1, @@ -342,7 +343,7 @@ transpose_mcast=True, fused_activation=None, ), - (1568, 1024, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (1568, 1024, 512): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(7, 8), in0_block_w=4, out_subblock_h=1, @@ -352,7 +353,7 @@ transpose_mcast=True, fused_activation=None, ), - (416, 512, 2048): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (416, 512, 2048): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(7, 8), in0_block_w=2, out_subblock_h=1, @@ -362,7 +363,7 @@ transpose_mcast=True, fused_activation=None, ), - (416, 1024, 2048): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (416, 1024, 2048): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(7, 8), in0_block_w=4, out_subblock_h=1, @@ -372,7 +373,7 @@ transpose_mcast=True, fused_activation=None, ), - (416, 2048, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (416, 2048, 512): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(7, 8), in0_block_w=8, out_subblock_h=2, @@ -384,7 +385,7 @@ ), }, 16: { - (50176, 64, 64): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (50176, 64, 64): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=2, out_subblock_h=4, @@ -395,7 +396,7 @@ fused_activation=None, mcast_in0=False, ), - (50176, 64, 256): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (50176, 64, 256): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=2, out_subblock_h=1, @@ -406,7 +407,7 @@ fused_activation=None, mcast_in0=False, ), - (50176, 256, 64): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (50176, 256, 64): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=8, out_subblock_h=4, @@ -417,7 +418,7 @@ fused_activation=None, mcast_in0=False, ), - (50176, 256, 128): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (50176, 256, 128): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=8, out_subblock_h=2, @@ -428,7 +429,7 @@ fused_activation=None, mcast_in0=False, ), - (12544, 128, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (12544, 128, 512): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=4, out_subblock_h=1, @@ -439,7 +440,7 @@ fused_activation=None, mcast_in0=False, ), - (12544, 256, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (12544, 256, 512): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=8, out_subblock_h=1, @@ -450,7 +451,7 @@ fused_activation=None, mcast_in0=False, ), - (12544, 512, 128): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (12544, 512, 128): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=16, out_subblock_h=2, @@ -461,7 +462,7 @@ fused_activation=None, mcast_in0=False, ), - (12544, 512, 256): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (12544, 512, 256): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(11, 8), in0_block_w=2, out_subblock_h=4, @@ -471,7 +472,7 @@ transpose_mcast=True, fused_activation=None, ), - (3136, 256, 1024): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (3136, 256, 1024): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(11, 8), in0_block_w=1, out_subblock_h=1, @@ -481,7 +482,7 @@ transpose_mcast=True, fused_activation=None, ), - (3136, 1024, 256): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (3136, 1024, 256): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(11, 8), in0_block_w=4, out_subblock_h=3, @@ -491,7 +492,7 @@ transpose_mcast=True, fused_activation=None, ), - (3136, 1024, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (3136, 1024, 512): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(11, 8), in0_block_w=4, out_subblock_h=3, @@ -501,7 +502,7 @@ transpose_mcast=True, fused_activation=None, ), - (3136, 512, 1024): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (3136, 512, 1024): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(11, 8), in0_block_w=2, out_subblock_h=1, @@ -511,7 +512,7 @@ transpose_mcast=True, fused_activation=None, ), - (3136, 1024, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (3136, 1024, 512): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(9, 8), in0_block_w=4, out_subblock_h=1, @@ -521,7 +522,7 @@ transpose_mcast=True, fused_activation=None, ), - (800, 512, 2048): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (800, 512, 2048): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(9, 8), in0_block_w=2, out_subblock_h=1, @@ -531,7 +532,7 @@ transpose_mcast=True, fused_activation=None, ), - (800, 1024, 2048): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (800, 1024, 2048): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(9, 8), in0_block_w=4, out_subblock_h=1, @@ -541,7 +542,7 @@ transpose_mcast=True, fused_activation=None, ), - (800, 2048, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (800, 2048, 512): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(9, 8), in0_block_w=8, out_subblock_h=3, diff --git a/models/demos/resnet/tt/metalResnetBlock50.py b/models/demos/resnet/tt/metalResnetBlock50.py index 5810ff0f456..05554c4366c 100644 --- a/models/demos/resnet/tt/metalResnetBlock50.py +++ b/models/demos/resnet/tt/metalResnetBlock50.py @@ -36,7 +36,7 @@ ) hardcoded_matmul_config_linear = { - 8: tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + 8: ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=2, out_subblock_h=1, @@ -47,7 +47,7 @@ fused_activation=None, mcast_in0=True, ), - 16: tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + 16: ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=2, out_subblock_h=1, @@ -58,7 +58,7 @@ fused_activation=None, mcast_in0=True, ), - 20: tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + 20: ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=2, out_subblock_h=1, @@ -395,7 +395,7 @@ def compute_conv_output_shape(conv_params, x_shape): }, }, 8: { - (25088, 64, 64): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (25088, 64, 64): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=2, out_subblock_h=4, @@ -406,7 +406,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (25088, 64, 256): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (25088, 64, 256): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=2, out_subblock_h=1, @@ -417,7 +417,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (25088, 256, 64): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (25088, 256, 64): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=8, out_subblock_h=4, @@ -428,7 +428,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (25088, 256, 128): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (25088, 256, 128): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=8, out_subblock_h=2, @@ -439,7 +439,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (6272, 128, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (6272, 128, 512): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=4, out_subblock_h=1, @@ -450,7 +450,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (6272, 256, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (6272, 256, 512): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=8, out_subblock_h=1, @@ -461,7 +461,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (6272, 512, 128): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (6272, 512, 128): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=16, out_subblock_h=2, @@ -472,7 +472,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (6272, 512, 256): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (6272, 512, 256): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(10, 8), in0_block_w=2, out_subblock_h=5, @@ -482,7 +482,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (1568, 256, 1024): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (1568, 256, 1024): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(10, 8), in0_block_w=1, out_subblock_h=1, @@ -492,7 +492,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (1568, 1024, 256): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (1568, 1024, 256): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(10, 8), in0_block_w=4, out_subblock_h=5, @@ -502,7 +502,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (1568, 1024, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (1568, 1024, 512): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(10, 8), in0_block_w=4, out_subblock_h=1, @@ -512,7 +512,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (1568, 512, 1024): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (1568, 512, 1024): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(10, 8), in0_block_w=2, out_subblock_h=1, @@ -522,7 +522,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (1568, 1024, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (1568, 1024, 512): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(7, 8), in0_block_w=4, out_subblock_h=1, @@ -532,7 +532,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (416, 512, 2048): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (416, 512, 2048): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(7, 8), in0_block_w=2, out_subblock_h=1, @@ -542,7 +542,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (416, 1024, 2048): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (416, 1024, 2048): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(7, 8), in0_block_w=4, out_subblock_h=1, @@ -552,7 +552,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (416, 2048, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (416, 2048, 512): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(7, 8), in0_block_w=8, out_subblock_h=2, @@ -564,7 +564,7 @@ def compute_conv_output_shape(conv_params, x_shape): ), }, 16: { - (50176, 64, 64): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (50176, 64, 64): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=2, out_subblock_h=4, @@ -575,7 +575,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (50176, 64, 256): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (50176, 64, 256): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=2, out_subblock_h=1, @@ -586,7 +586,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (50176, 256, 64): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (50176, 256, 64): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=8, out_subblock_h=4, @@ -597,7 +597,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (50176, 256, 128): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (50176, 256, 128): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=8, out_subblock_h=2, @@ -608,7 +608,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (12544, 128, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (12544, 128, 512): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=4, out_subblock_h=1, @@ -619,7 +619,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (12544, 256, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (12544, 256, 512): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=8, out_subblock_h=1, @@ -630,7 +630,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (12544, 512, 128): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (12544, 512, 128): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=16, out_subblock_h=2, @@ -641,7 +641,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (12544, 512, 256): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (12544, 512, 256): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(11, 8), in0_block_w=2, out_subblock_h=4, @@ -651,7 +651,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (3136, 256, 1024): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (3136, 256, 1024): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(11, 8), in0_block_w=1, out_subblock_h=1, @@ -661,7 +661,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (3136, 1024, 256): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (3136, 1024, 256): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(11, 8), in0_block_w=4, out_subblock_h=3, @@ -671,7 +671,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (3136, 1024, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (3136, 1024, 512): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(11, 8), in0_block_w=4, out_subblock_h=3, @@ -681,7 +681,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (3136, 512, 1024): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (3136, 512, 1024): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(11, 8), in0_block_w=2, out_subblock_h=1, @@ -691,7 +691,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (3136, 1024, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (3136, 1024, 512): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(9, 8), in0_block_w=4, out_subblock_h=1, @@ -701,7 +701,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (800, 512, 2048): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (800, 512, 2048): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(9, 8), in0_block_w=2, out_subblock_h=1, @@ -711,7 +711,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (800, 1024, 2048): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (800, 1024, 2048): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(9, 8), in0_block_w=4, out_subblock_h=1, @@ -721,7 +721,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (800, 2048, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (800, 2048, 512): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(9, 8), in0_block_w=8, out_subblock_h=3, @@ -733,7 +733,7 @@ def compute_conv_output_shape(conv_params, x_shape): ), }, 20: { - (62720, 64, 64): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (62720, 64, 64): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=2, out_subblock_h=4, @@ -744,7 +744,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (62720, 64, 256): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (62720, 64, 256): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=2, out_subblock_h=1, @@ -755,7 +755,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (62720, 256, 64): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (62720, 256, 64): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=8, out_subblock_h=4, @@ -766,7 +766,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (62720, 256, 128): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (62720, 256, 128): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=8, out_subblock_h=2, @@ -777,7 +777,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (15680, 128, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (15680, 128, 512): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=4, out_subblock_h=1, @@ -788,7 +788,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (15680, 256, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (15680, 256, 512): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=8, out_subblock_h=1, @@ -799,7 +799,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (15680, 512, 128): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + (15680, 512, 128): ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(12, 9), in0_block_w=16, out_subblock_h=1, @@ -810,7 +810,7 @@ def compute_conv_output_shape(conv_params, x_shape): fused_activation=None, mcast_in0=False, ), - (15680, 512, 256): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (15680, 512, 256): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(12, 8), in0_block_w=2, out_subblock_h=1, @@ -820,7 +820,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (3936, 256, 1024): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (3936, 256, 1024): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(12, 8), in0_block_w=1, out_subblock_h=1, @@ -830,7 +830,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (3936, 1024, 256): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (3936, 1024, 256): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(12, 8), in0_block_w=4, out_subblock_h=1, @@ -840,7 +840,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (3936, 1024, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (3936, 1024, 512): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(12, 8), in0_block_w=4, out_subblock_h=1, @@ -850,7 +850,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (3936, 512, 1024): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (3936, 512, 1024): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(12, 8), in0_block_w=2, out_subblock_h=1, @@ -860,7 +860,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (3936, 1024, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (3936, 1024, 512): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(12, 8), in0_block_w=4, out_subblock_h=4, @@ -870,7 +870,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (992, 512, 2048): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (992, 512, 2048): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(11, 8), in0_block_w=2, out_subblock_h=1, @@ -880,7 +880,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (992, 1024, 2048): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (992, 1024, 2048): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(11, 8), in0_block_w=4, out_subblock_h=1, @@ -890,7 +890,7 @@ def compute_conv_output_shape(conv_params, x_shape): transpose_mcast=True, fused_activation=None, ), - (992, 2048, 512): tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + (992, 2048, 512): ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(11, 8), in0_block_w=8, out_subblock_h=3, diff --git a/models/demos/t3000/falcon40b/tests/test_falcon_model_single_chip.py b/models/demos/t3000/falcon40b/tests/test_falcon_model_single_chip.py index c91d8e66363..fcd981b7450 100644 --- a/models/demos/t3000/falcon40b/tests/test_falcon_model_single_chip.py +++ b/models/demos/t3000/falcon40b/tests/test_falcon_model_single_chip.py @@ -162,7 +162,7 @@ def test_sharded_matmul_1d_in0( ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, ) - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 1), in0_block_w=32, out_subblock_h=1, @@ -264,7 +264,7 @@ def test_sharded_matmul_1d_in0_multi_chip( output_mem_config = sharded_mem_config if out_sharded else interleaved_mem_config if num_devices == 4: - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, @@ -276,7 +276,7 @@ def test_sharded_matmul_1d_in0_multi_chip( mcast_in0=True, ) elif num_devices == 8: - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, @@ -377,7 +377,7 @@ def test_sharded_matmul_1d_in0_multi_chip( output_mem_config = sharded_mem_config if out_sharded else interleaved_mem_config if num_devices == 4: - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, @@ -389,7 +389,7 @@ def test_sharded_matmul_1d_in0_multi_chip( mcast_in0=True, ) elif num_devices == 8: - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, diff --git a/models/demos/t3000/falcon40b/tt/model_config.py b/models/demos/t3000/falcon40b/tt/model_config.py index 05da5d7a771..8967a5d2cad 100644 --- a/models/demos/t3000/falcon40b/tt/model_config.py +++ b/models/demos/t3000/falcon40b/tt/model_config.py @@ -446,9 +446,7 @@ def get_decode_model_config(model_config_str, input_shape, num_devices): False, ), ) - model_config[ - "QKV_MM_PROGCFG" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["QKV_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 1), in0_block_w=32, # TODO: Can this be larger out_subblock_h=1, # TODO: Can this be larger @@ -567,9 +565,7 @@ def get_decode_model_config(model_config_str, input_shape, num_devices): ), ) model_config["DENSE_4H_TO_H_MM_OUTPUT_MEMCFG"] = WIDTH_SHARDED_MEMCFG - model_config[ - "SELFOUT_MM_PROGCFG" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["SELFOUT_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # TODO: Can this be larger out_subblock_h=1, # TODO: Can this be larger @@ -581,9 +577,7 @@ def get_decode_model_config(model_config_str, input_shape, num_devices): mcast_in0=True, ) # MLP - model_config[ - "DENSE_H_TO_4H_MM_PROGCFG" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["DENSE_H_TO_4H_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # TODO: Can this be larger out_subblock_h=1, # TODO: Can this be larger @@ -594,9 +588,7 @@ def get_decode_model_config(model_config_str, input_shape, num_devices): fused_activation=[ttnn.experimental.tensor.FusibleActivation.GELU, True], mcast_in0=True, ) - model_config[ - "DENSE_4H_TO_H_MM_PROGCFG" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["DENSE_4H_TO_H_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=32, # TODO: Can this be larger out_subblock_h=1, # TODO: Can this be larger @@ -644,9 +636,7 @@ def get_decode_model_config(model_config_str, input_shape, num_devices): # LM Head model_config["LM_HEAD_MM_OUTPUT_MEMCFG"] = WIDTH_SHARDED_MEMCFG - model_config[ - "LM_HEAD_MM_PROGCFG" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["LM_HEAD_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, @@ -834,9 +824,7 @@ def get_prefill_model_config(model_config_str, input_shape, num_devices): ) # attetnion_slice_size * 16 qheads // attention_num_cores // TILE_SIZE # Attention - model_config[ - "ATTENTION_MM_PROGCFG" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["ATTENTION_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=attention_mm_grid_size, in0_block_w=head_dim // 32, out_subblock_h=1, @@ -855,9 +843,7 @@ def get_prefill_model_config(model_config_str, input_shape, num_devices): block_h=attetnion_mm_M, block_w=row_height // 32, ) - model_config[ - "ATTENTION_MM_2_PROGCFG" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["ATTENTION_MM_2_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=attention_mm_grid_size, in0_block_w=row_height // 32, out_subblock_h=1, # use 4 for S=2k when hang is fixed diff --git a/models/demos/t3000/falcon40b/tt/model_utils.py b/models/demos/t3000/falcon40b/tt/model_utils.py index 590609f3595..daf833c4ac3 100644 --- a/models/demos/t3000/falcon40b/tt/model_utils.py +++ b/models/demos/t3000/falcon40b/tt/model_utils.py @@ -125,7 +125,7 @@ def matmul_1d_config( if overwrite_subblock_h is not None: out_subblock_h = overwrite_subblock_h - return ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + return ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(grid.x, grid.y), in0_block_w=per_core_k, out_subblock_h=out_subblock_h, @@ -214,7 +214,7 @@ def matmul_2d_config( # f"per_core_m: {per_core_m}, per_core_k: {per_core_k}, per_core_n: {per_core_n}, out_subblock_h: {out_subblock_h}, out_subblock_w: {out_subblock_w}" # ) - return ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + return ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(grid.x, grid.y), in0_block_w=per_core_k, # how much inner dim you take each time out_subblock_h=out_subblock_h, # Must be divisible by per_core_M diff --git a/models/demos/t3000/llama2_70b/scripts/model_config_n150.py b/models/demos/t3000/llama2_70b/scripts/model_config_n150.py index 1bc49f7e65e..aa731a234a9 100644 --- a/models/demos/t3000/llama2_70b/scripts/model_config_n150.py +++ b/models/demos/t3000/llama2_70b/scripts/model_config_n150.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import tt_lib as ttl +import ttnn from loguru import logger from pathlib import Path @@ -405,7 +406,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): # ) model_config["FUSED_QKV_MM_OUTPUT_MEMCFG"] = WIDTH_SHARDED_MEMCFG if num_devices == 1: - model_config["FUSED_QKV_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FUSED_QKV_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, @@ -417,7 +418,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): mcast_in0=True, ) elif num_devices == 4: - model_config["FUSED_QKV_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FUSED_QKV_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 2), in0_block_w=16, out_subblock_h=1, @@ -430,7 +431,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): ) elif num_devices == 8: # Unpadded version - model_config["FUSED_QKV_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FUSED_QKV_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 1), in0_block_w=32, out_subblock_h=1, @@ -442,7 +443,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): mcast_in0=True, ) # Padded version - # model_config["FUSED_QKV_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + # model_config["FUSED_QKV_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( # compute_with_storage_grid_size=(8, 8), # in0_block_w=4, # out_subblock_h=1, @@ -455,7 +456,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): # ) # Split QKV Matmul Config - model_config["WQ_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["WQ_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, @@ -467,7 +468,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): mcast_in0=True, ) - model_config["WK_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["WK_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, @@ -585,7 +586,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): # ) if num_devices == 8: - model_config["ROT_MAT_K_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["ROT_MAT_K_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(1, 1), in0_block_w=4, out_subblock_h=1, @@ -598,7 +599,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): ) model_config["ROT_MAT_K_MM_OUTPUT_MEMCFG"] = HEIGHT_SHARDED_MEMCFG - model_config["ROT_MAT_Q_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["ROT_MAT_Q_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 1), in0_block_w=4, out_subblock_h=1, @@ -739,7 +740,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): model_config["SELFOUT_MM_OUTPUT_MEMCFG"] = WIDTH_SHARDED_MEMCFG if num_devices == 4: # (32 x 8k) x (8k x 2k) = (32 x 2k) - model_config["SELFOUT_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["SELFOUT_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=4, out_subblock_h=1, @@ -752,7 +753,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): ) elif num_devices == 8: # (32 x 8k) x (8k x 1k) = (32 x 1k) - model_config["SELFOUT_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["SELFOUT_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, @@ -888,7 +889,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): per_core_N = 14 if num_devices == 1 else 7 # N / TILE_WIDTH / Grid_Size is based on compute_with_storage_grid_size - model_config["FF1_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FF1_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=compute_with_storage_grid_size, in0_block_w=in0_block_w, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -900,7 +901,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): mcast_in0=True, ) - model_config["FF3_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FF3_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=compute_with_storage_grid_size, in0_block_w=in0_block_w, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -913,7 +914,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): ) # FF2 Matmul Config Variables, using dim=-2 shard, All-gather/All-reduce(1D-Weight Stationary) schemes, if num_devices == 1: - model_config["FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=14, out_subblock_h=1, @@ -926,7 +927,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): ) elif num_devices == 4: if all_gather: - model_config["FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=28, # K = 28672 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, @@ -938,7 +939,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): mcast_in0=True, ) else: # All Reduce Case - model_config["FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=7, # K = 7168 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, @@ -951,7 +952,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): ) elif num_devices == 8: if all_gather: - model_config["FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=28, # K = 28672 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, @@ -963,7 +964,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): mcast_in0=True, ) else: - model_config["FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 2), in0_block_w=7, # K = 3584 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, @@ -997,7 +998,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): ), ) if num_devices == 4: - model_config["PADDED_FF1_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF1_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=4, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -1008,7 +1009,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): fused_activation=ttl.tensor.FusibleActivation.SILU, mcast_in0=True, ) - model_config["PADDED_FF3_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF3_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=4, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -1039,7 +1040,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): False, ), ) - model_config["PADDED_FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=16, # K = 32768 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, @@ -1052,7 +1053,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): ) elif num_devices == 8: - model_config["PADDED_FF1_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF1_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=4, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -1063,7 +1064,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): fused_activation=ttl.tensor.FusibleActivation.SILU, mcast_in0=True, ) - model_config["PADDED_FF3_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF3_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=4, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -1094,7 +1095,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): False, ), ) - model_config["PADDED_FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=32, # K = 32768 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, @@ -1156,7 +1157,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): # LM Head model_config["LM_HEAD_MM_OUTPUT_MEMCFG"] = WIDTH_SHARDED_MEMCFG - model_config["LM_HEAD_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["LM_HEAD_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, diff --git a/models/demos/t3000/llama2_70b/tests/perf/ivan_ff.py b/models/demos/t3000/llama2_70b/tests/perf/ivan_ff.py index b6976fda3f2..883202b3774 100644 --- a/models/demos/t3000/llama2_70b/tests/perf/ivan_ff.py +++ b/models/demos/t3000/llama2_70b/tests/perf/ivan_ff.py @@ -8,6 +8,7 @@ import tt_lib import tt_lib as ttl +import ttnn from models.utility_functions import torch2tt_tensor, tt2torch_tensor @@ -66,7 +67,7 @@ def run_test_ff1( max_dst_size = 4 if USE_ACC else 8 out_subblock_w = max([i for i in range(1, max_dst_size + 1) if (per_core_N % i) == 0]) - prog_config = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + prog_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( # compute_with_storage_grid_size=(8,4), compute_with_storage_grid_size=compute_grid, in0_block_w=in0_block_w, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size diff --git a/models/demos/t3000/llama2_70b/tests/perf/test_bmm.py b/models/demos/t3000/llama2_70b/tests/perf/test_bmm.py index e526a7ca7c2..f1585ff005f 100644 --- a/models/demos/t3000/llama2_70b/tests/perf/test_bmm.py +++ b/models/demos/t3000/llama2_70b/tests/perf/test_bmm.py @@ -8,6 +8,7 @@ import tt_lib import tt_lib as ttl +import ttnn from models.demos.t3000.llama2_70b.reference.llama.llama import Llama from models.demos.t3000.llama2_70b.tt.model_config import ( get_model_config, @@ -156,7 +157,7 @@ def run_test( q_tt = torch2tt_tensor(q_in, device) # , tt_memory_config=inp_mem_config) k_tt = torch2tt_tensor(k_in, device, tt_memory_config=k_mem_config) - prog_config = tt_lib.operations.primary.MatmulMultiCoreReuseProgramConfig( + prog_config = ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=[8, 4], in0_block_w=HEAD_DIM // TILE_SIZE, out_subblock_h=1, # TODO: Maximize diff --git a/models/demos/t3000/llama2_70b/tests/perf/test_ff1.py b/models/demos/t3000/llama2_70b/tests/perf/test_ff1.py index dcf375f2fe0..89e4bef74ad 100644 --- a/models/demos/t3000/llama2_70b/tests/perf/test_ff1.py +++ b/models/demos/t3000/llama2_70b/tests/perf/test_ff1.py @@ -8,6 +8,7 @@ import tt_lib import tt_lib as ttl +import ttnn from models.demos.t3000.llama2_70b.reference.llama.llama import Llama from models.demos.t3000.llama2_70b.tt.model_config import ( get_model_config, @@ -85,7 +86,7 @@ def run_test_ff1( max_dst_size = 4 if USE_ACC else 8 out_subblock_w = max([i for i in range(1, max_dst_size + 1) if (per_core_N % i) == 0]) - prog_config = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + prog_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=compute_with_storage_grid_size, in0_block_w=in0_block_w, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M diff --git a/models/demos/t3000/llama2_70b/tests/perf/test_llama_matmul_perf.py b/models/demos/t3000/llama2_70b/tests/perf/test_llama_matmul_perf.py index fae45d6c58f..4121f665a38 100644 --- a/models/demos/t3000/llama2_70b/tests/perf/test_llama_matmul_perf.py +++ b/models/demos/t3000/llama2_70b/tests/perf/test_llama_matmul_perf.py @@ -43,7 +43,7 @@ def __init__(self, device): tt_dtype=BFP8_DTYPE, ) - self.prog_config = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.prog_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -77,7 +77,7 @@ def __init__(self, device): tt_dtype=BFP8_DTYPE, ) - self.prog_config = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.prog_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=32, out_subblock_h=1, @@ -124,7 +124,7 @@ def __init__(self, device): tt_dtype=BFP8_DTYPE, ) - self.prog_config1 = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.prog_config1 = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # how much inner dim you take each time out_subblock_h=2, # Must be divisible by per_core_M @@ -136,7 +136,7 @@ def __init__(self, device): fuse_batch=True, ) - self.prog_config3 = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.prog_config3 = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # how much inner dim you take each time out_subblock_h=2, # Must be divisible by per_core_M @@ -148,7 +148,7 @@ def __init__(self, device): fuse_batch=True, ) - self.prog_config2 = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.prog_config2 = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=32, # how much inner dim you take each time out_subblock_h=4, # Must be divisible by per_core_M @@ -303,7 +303,7 @@ def __init__(self, device): ) in0_block_w = 4 - self.prog_config1 = ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + self.prog_config1 = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( # [2048 x 8192] x [8192 x 4096] compute_with_storage_grid_size=(8, 8), in0_block_w=in0_block_w, # how much inner dim you take each time @@ -315,7 +315,7 @@ def __init__(self, device): transpose_mcast=False, ) - self.prog_config3 = ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + self.prog_config3 = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( # [2048 x 8192] x [8192 x 4096] compute_with_storage_grid_size=(8, 8), in0_block_w=in0_block_w, # how much inner dim you take each time @@ -327,7 +327,7 @@ def __init__(self, device): transpose_mcast=False, ) - self.prog_config2 = ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + self.prog_config2 = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( # [2048 x 32768] x [32768 x 1024] compute_with_storage_grid_size=(8, 8), in0_block_w=in0_block_w, # how much inner dim you take each time diff --git a/models/demos/t3000/llama2_70b/tt/model_config.py b/models/demos/t3000/llama2_70b/tt/model_config.py index d3c8154fad3..20c04e552af 100644 --- a/models/demos/t3000/llama2_70b/tt/model_config.py +++ b/models/demos/t3000/llama2_70b/tt/model_config.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import tt_lib as ttl +import ttnn from loguru import logger import os from pathlib import Path @@ -371,7 +372,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) inplace=True, ) # LM Head - model_config["LM_HEAD_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["LM_HEAD_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, @@ -382,7 +383,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) fused_activation=None, mcast_in0=True, ) - model_config["LLAMA3_LM_HEAD_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["LLAMA3_LM_HEAD_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, @@ -414,9 +415,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) block_w=num_tiles_per_core_w, inplace=True, ) - model_config[ - "LM_HEAD_MM_PROGCFG_LAMBDA" - ] = lambda seq_tiles: ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + model_config["LM_HEAD_MM_PROGCFG_LAMBDA"] = lambda seq_tiles: ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # how much inner dim you take each time out_subblock_h=1, # Must be divisible by per_core_M @@ -500,7 +499,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) # qkv_list shape is [8192,1280] per_core_M = seq_len // 32 // 4 in0_block_w = 32 if seq_len == 128 else 8 # smaller in0_block_w for larger seq_len to fit in L1 - model_config["FUSED_QKV_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + model_config["FUSED_QKV_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=in0_block_w, # how much inner dim you take each time out_subblock_h=1, # Must be divisible by per_core_M @@ -511,7 +510,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) fused_activation=None, ) else: - model_config["FUSED_QKV_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FUSED_QKV_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 1), in0_block_w=32, out_subblock_h=1, @@ -544,7 +543,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) ) # TODO: Remove once confirm we don't need this, using only fallback ops if llm_mode == "decode": - model_config["ROT_MAT_Q_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["ROT_MAT_Q_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 1), in0_block_w=4, out_subblock_h=1, @@ -558,7 +557,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) else: # ttnn.Shape([1, 8, 128, 128]) # ttnn.Shape([8, 128, 128, 128]) - model_config["ROT_MAT_Q_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseProgramConfig( + model_config["ROT_MAT_Q_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=[8, 1], in0_block_w=4, # 128 // TILE_SIZE (dynamic) out_subblock_h=1, @@ -568,7 +567,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) ) # ttnn.Shape([1, 1, 128, 128]) # ttnn.Shape([1, 128, 128, 128]) - model_config["ROT_MAT_K_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["ROT_MAT_K_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(1, 1), in0_block_w=4, out_subblock_h=1, @@ -633,9 +632,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) ), ) if llm_mode == "decode": - model_config[ - "ATTN_BATCHED_MM_PROGCFG_LAMBDA" - ] = lambda seq_tiles: ttl.operations.primary.MatmulMultiCoreReuseProgramConfig( + model_config["ATTN_BATCHED_MM_PROGCFG_LAMBDA"] = lambda seq_tiles: ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=[8, 4], in0_block_w=head_dim // 32, # HEAD_DIM // TILE_SIZE out_subblock_h=1, # TODO: Maximize @@ -656,9 +653,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) False, ), ) - model_config[ - "SCORES_BATCHED_MM_PROGCFG_LAMBDA" - ] = lambda seq_tiles: ttl.operations.primary.MatmulMultiCoreReuseProgramConfig( + model_config["SCORES_BATCHED_MM_PROGCFG_LAMBDA"] = lambda seq_tiles: ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=[8, 4], in0_block_w=seq_tiles, # SEQ_LEN // TILE_SIZE (dynamic) out_subblock_h=1, # TODO: Maximize @@ -695,9 +690,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) else: # (Pdb) query_layer[0].shape: ttnn.Shape([1, 8, 128, 128]) # (Pdb) key_layer_transposed[0].shape: ttnn.Shape([1, 1, 128, 128]) - model_config[ - "ATTN_BATCHED_MM_PROGCFG" - ] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["ATTN_BATCHED_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=head_dim // 32, out_subblock_h=1, @@ -708,9 +701,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) fused_activation=None, mcast_in0=False, ) - model_config[ - "SCORES_BATCHED_MM_PROGCFG" - ] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["SCORES_BATCHED_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=seq_len // 32, out_subblock_h=1, @@ -736,9 +727,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) ), ) - model_config[ - "ATTN_BATCHED_MM_PROGCFG_LAMBDA" - ] = lambda seq_tiles: ttl.operations.primary.MatmulMultiCoreReuseProgramConfig( + model_config["ATTN_BATCHED_MM_PROGCFG_LAMBDA"] = lambda seq_tiles: ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=[8, 1], in0_block_w=head_dim // 32, # HEAD_DIM // TILE_DIM out_subblock_h=1, # TODO: Maximize @@ -761,9 +750,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) ), ) - model_config[ - "SCORES_BATCHED_MM_PROGCFG_LAMBDA" - ] = lambda seq_tiles: ttl.operations.primary.MatmulMultiCoreReuseProgramConfig( + model_config["SCORES_BATCHED_MM_PROGCFG_LAMBDA"] = lambda seq_tiles: ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=[8, 1], in0_block_w=seq_tiles, # SEQ_LEN // TILE_DIM (dynamic) out_subblock_h=1, # TODO: Maximize @@ -841,7 +828,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) ), ) if llm_mode == "decode": - model_config["SELFOUT_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["SELFOUT_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # (32 x 8k) x (8k x 1k) = (32 x 1k) out_subblock_h=1, @@ -853,9 +840,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) mcast_in0=True, ) else: - model_config[ - "SELFOUT_MM_PROGCFG_LAMBDA" - ] = lambda seq_tiles: ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + model_config["SELFOUT_MM_PROGCFG_LAMBDA"] = lambda seq_tiles: ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # how much inner dim you take each time out_subblock_h=1, # Must be divisible by per_core_M @@ -893,7 +878,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) # Padded MLP 32K config: if num_devices == 8: if llm_mode == "decode": - model_config["PADDED_FF1_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF1_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -904,7 +889,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) fused_activation=ttl.tensor.FusibleActivation.SILU, mcast_in0=True, ) - model_config["PADDED_FF3_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF3_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -915,7 +900,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) fused_activation=None, mcast_in0=True, ) - model_config["PADDED_FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=32, # K = 32768 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, @@ -930,7 +915,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) # Llama 2 MLP Module Prefill model_config[ "PADDED_FF1_MM_PROGCFG_LAMBDA" - ] = lambda seq_tiles: ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + ] = lambda seq_tiles: ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # how much inner dim you take each time out_subblock_h=1, # Must be divisible by per_core_M @@ -943,7 +928,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) model_config[ "PADDED_FF3_MM_PROGCFG_LAMBDA" - ] = lambda seq_tiles: ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + ] = lambda seq_tiles: ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # how much inner dim you take each time out_subblock_h=1, # Must be divisible by per_core_M @@ -958,7 +943,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) # input1: [1,1,32k,1k] model_config[ "PADDED_FF2_MM_PROGCFG_LAMBDA" - ] = lambda seq_tiles: ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + ] = lambda seq_tiles: ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # how much inner dim you take each time out_subblock_h=1, # Must be divisible by per_core_M @@ -969,7 +954,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) fused_activation=None, ) elif num_devices == 32: - model_config["PADDED_FF1_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF1_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=2, # K = 8k / 4 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -980,7 +965,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) fused_activation=None, # Can't use SILU on partial activations mcast_in0=True, ) - model_config["PADDED_FF3_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF3_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=2, # K = 8k / 4 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -991,7 +976,7 @@ def get_model_config(model_config_str="BFLOAT16-DRAM", num_devices=8, seq_len=1) fused_activation=None, mcast_in0=True, ) - model_config["PADDED_FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=4, # K = 32k / 8 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, diff --git a/models/demos/t3000/mixtral8x7b/tt/model_config.py b/models/demos/t3000/mixtral8x7b/tt/model_config.py index 5b1f6596339..dd660e49636 100644 --- a/models/demos/t3000/mixtral8x7b/tt/model_config.py +++ b/models/demos/t3000/mixtral8x7b/tt/model_config.py @@ -167,9 +167,7 @@ def __init__(self, device=None, instruct=False, dummy_weights=False): self.model_config["SHARDED_NORM_OUTPUT_MEMCFG"] = self.model_config["SHARDED_NORM_INPUT_MEMCFG"] # Create program configs for the different ttlib matmul ops - self.model_config[ - "ROT_MAT_MM_PROGCFG" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.model_config["ROT_MAT_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=4, out_subblock_h=1, @@ -190,9 +188,7 @@ def __init__(self, device=None, instruct=False, dummy_weights=False): ) ) - self.model_config[ - "GATE_MM_OUTPUT_PROGCFG" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.model_config["GATE_MM_OUTPUT_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 1), in0_block_w=16, out_subblock_h=1, @@ -204,9 +200,7 @@ def __init__(self, device=None, instruct=False, dummy_weights=False): mcast_in0=False, ) - self.model_config[ - "QKV_MM_OUTPUT_PROGCFG" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.model_config["QKV_MM_OUTPUT_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=4, out_subblock_h=1, @@ -219,7 +213,7 @@ def __init__(self, device=None, instruct=False, dummy_weights=False): ) self.model_config["SCORES_BATCHED_MM_PROGCFG"] = cached_lambda( - lambda p: ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig( + lambda p: ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=4, out_subblock_h=1, @@ -230,7 +224,7 @@ def __init__(self, device=None, instruct=False, dummy_weights=False): ) self.model_config["VALUES_BATCHED_MM_PROGCFG"] = cached_lambda( - lambda p: ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig( + lambda p: ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=p, out_subblock_h=1, @@ -240,9 +234,7 @@ def __init__(self, device=None, instruct=False, dummy_weights=False): ) ) - self.model_config[ - "LM_HEAD_OUTPUT_PROGCFG" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.model_config["LM_HEAD_OUTPUT_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=1, out_subblock_h=1, @@ -254,9 +246,7 @@ def __init__(self, device=None, instruct=False, dummy_weights=False): mcast_in0=True, ) - self.model_config[ - "FF1_OUTPUT_PROGCFG" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.model_config["FF1_OUTPUT_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=2, # K = 4096 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -268,9 +258,7 @@ def __init__(self, device=None, instruct=False, dummy_weights=False): mcast_in0=True, ) - self.model_config[ - "FF3_OUTPUT_PROGCFG" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.model_config["FF3_OUTPUT_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=2, # K = 4096 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -282,9 +270,7 @@ def __init__(self, device=None, instruct=False, dummy_weights=False): mcast_in0=True, ) - self.model_config[ - "FF2_OUTPUT_PROGCFG" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.model_config["FF2_OUTPUT_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=7, # K = 14336 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -297,9 +283,7 @@ def __init__(self, device=None, instruct=False, dummy_weights=False): mcast_in0=True, ) - self.model_config[ - "OUTPUT_MM_PROGCFG" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.model_config["OUTPUT_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(7, 6), # TODO Hanging with full coreGrid (8,8) in0_block_w=2, out_subblock_h=1, diff --git a/models/demos/wormhole/mistral7b/tt/mistral_attention.py b/models/demos/wormhole/mistral7b/tt/mistral_attention.py index 254a35f85ec..5ff7eda9207 100644 --- a/models/demos/wormhole/mistral7b/tt/mistral_attention.py +++ b/models/demos/wormhole/mistral7b/tt/mistral_attention.py @@ -151,7 +151,7 @@ def __init__( for i in range(self.num_devices) ] - self.q_heads_program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + self.q_heads_program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=ttnn.experimental.tensor.CoreCoord(self.grid_size.x, self.grid_size.y), in0_block_w=4, out_subblock_h=4, @@ -161,7 +161,7 @@ def __init__( transpose_mcast=False, fused_activation=None, ) - self.k_heads_program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + self.k_heads_program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=ttnn.experimental.tensor.CoreCoord(self.grid_size.x, self.grid_size.y), in0_block_w=4, out_subblock_h=1, @@ -205,7 +205,7 @@ def __init__( ) for i in range(len(devices)) ] - self.expand_program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + self.expand_program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=ttnn.experimental.tensor.CoreCoord(self.grid_size.x, self.grid_size.y), in0_block_w=4, out_subblock_h=2, @@ -216,7 +216,7 @@ def __init__( fused_activation=None, ) - self.reduce_program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + self.reduce_program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=ttnn.experimental.tensor.CoreCoord(self.grid_size.x, self.grid_size.y), in0_block_w=4, out_subblock_h=4, @@ -227,7 +227,7 @@ def __init__( fused_activation=None, ) - self.attn_program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig( + self.attn_program_config = ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=ttnn.experimental.tensor.CoreCoord(8, 4), in0_block_w=1, out_subblock_h=1, diff --git a/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_cross_attention.py b/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_cross_attention.py index 31f5ca0f91e..1ff72cccb30 100644 --- a/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_cross_attention.py +++ b/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_cross_attention.py @@ -246,9 +246,7 @@ def __init__(self, device, parameters, seq_len): in0_block_h, in0_block_w, out_subblock_h, out_subblock_w, out_block_h, out_block_w = determine_blocking( M, K, N, grid_size ) - self.program_configs[ - "qkv" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + self.program_configs["qkv"] = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=in0_block_w, out_subblock_h=out_subblock_h, @@ -265,7 +263,7 @@ def __init__(self, device, parameters, seq_len): in0_block_h, in0_block_w, out_subblock_h, out_subblock_w, out_block_h, out_block_w = determine_blocking( M, K, N, grid_size ) - self.program_configs["q"] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + self.program_configs["q"] = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=in0_block_w, out_subblock_h=out_subblock_h, @@ -286,9 +284,7 @@ def __init__(self, device, parameters, seq_len): in0_block_h, in0_block_w, out_subblock_h, out_subblock_w, out_block_h, out_block_w = determine_blocking( M, K, N, grid_size ) - self.program_configs[ - "kv" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + self.program_configs["kv"] = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=in0_block_w, out_subblock_h=out_subblock_h, @@ -316,9 +312,7 @@ def __init__(self, device, parameters, seq_len): if slow_mm: out_subblock_h = 1 out_subblock_w = 1 - self.program_configs[ - "tsa_qkt" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.program_configs["tsa_qkt"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=self.tsa_grid_size, in0_block_w=self.key_len // 32, per_core_M=tiles_per_shard, @@ -343,9 +337,7 @@ def __init__(self, device, parameters, seq_len): if slow_mm: out_subblock_h = 1 out_subblock_w = 1 - self.program_configs[ - "tsa_v" - ] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.program_configs["tsa_v"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=self.tsa_grid_size, in0_block_w=seq_len // 32, per_core_M=tiles_per_shard, @@ -479,7 +471,7 @@ def sharded_attention(self, query, key, value, head_size, attn_type): query.deallocate() else: q_sharded = query - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=inner // 32, out_subblock_h=1, @@ -573,7 +565,7 @@ def sharded_attention(self, query, key, value, head_size, attn_type): else: v_sharded = value - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=key_len // 32, out_subblock_h=1, @@ -625,7 +617,7 @@ def out(self, hidden_states): hidden_states, grid_size, ttnn.experimental.tensor.TensorMemoryLayout.HEIGHT_SHARDED ) output_mem_config = self.height_sharded_memory_config - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=K // 32 if hs else 1, per_core_M=B * M // num_cores // 32 if hs else B * M // 32, @@ -644,7 +636,7 @@ def out(self, hidden_states): in0_block_h, in0_block_w, out_subblock_h, out_subblock_w, out_block_h, out_block_w = determine_blocking( M, K, N, grid_size ) - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=in0_block_w, out_subblock_h=out_subblock_h, diff --git a/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_feedforward.py b/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_feedforward.py index fd51cd276ad..59a5e05cd55 100644 --- a/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_feedforward.py +++ b/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_feedforward.py @@ -71,7 +71,7 @@ def __call__(self, config, hidden_states): if size == 512: out_subblock_h = 1 out_subblock_w = 1 - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=in0_block_w, out_subblock_h=out_subblock_h, diff --git a/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_geglu.py b/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_geglu.py index 37a63bd7c8c..76c4e6454b9 100644 --- a/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_geglu.py +++ b/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_geglu.py @@ -103,7 +103,7 @@ def __call__(self, config, hidden_states): if size == 512: out_subblock_h = 1 out_subblock_w = 1 - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=in0_block_w, out_subblock_h=out_subblock_h, @@ -133,7 +133,7 @@ def __call__(self, config, hidden_states): if hidden_states.shape[-2] == 8192: proj = ttnn.reallocate(proj) - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=in0_block_w, out_subblock_h=out_subblock_h, diff --git a/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_resnetblock2d.py b/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_resnetblock2d.py index 6b5a3f79d42..c6f01367e07 100644 --- a/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_resnetblock2d.py +++ b/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_resnetblock2d.py @@ -507,7 +507,7 @@ def __call__( time_emb_proj_out_channels = out_channels * 2 else: raise ValueError(f"unknown time_embedding_norm : {time_embedding_norm} ") - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=temb.shape[-1] // grid_size[1] // 32, out_subblock_h=1, diff --git a/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_resnetblock2d_new_conv.py b/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_resnetblock2d_new_conv.py index b15faea0e27..71b49e8ade5 100644 --- a/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_resnetblock2d_new_conv.py +++ b/models/demos/wormhole/stable_diffusion/tt2/ttnn_functional_resnetblock2d_new_conv.py @@ -541,7 +541,7 @@ def __call__( time_emb_proj_out_channels = out_channels * 2 else: raise ValueError(f"unknown time_embedding_norm : {time_embedding_norm} ") - program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=grid_size, in0_block_w=temb.shape[-1] // grid_size[1] // 32, out_subblock_h=1, diff --git a/models/experimental/falcon_40b/tests/test_reproduce_nd_matmul.py b/models/experimental/falcon_40b/tests/test_reproduce_nd_matmul.py index 2e05f98da66..6d24c8372d7 100644 --- a/models/experimental/falcon_40b/tests/test_reproduce_nd_matmul.py +++ b/models/experimental/falcon_40b/tests/test_reproduce_nd_matmul.py @@ -6,6 +6,7 @@ import pytest import tt_lib as ttl +import ttnn from models.utility_functions import comp_pcc, tt2torch_tensor, torch2tt_tensor import torch @@ -52,7 +53,7 @@ def test_reproduce_matmul_1d( a_t = torch2tt_tensor(A, device, ttl.tensor.Layout.TILE, in0_mem_config, in0_dtype) b_t = torch2tt_tensor(B, device, ttl.tensor.Layout.TILE, in1_mem_config, in1_dtype) - program_config = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=in_block_w, out_subblock_h=out_subblock_h, @@ -154,7 +155,7 @@ def test_reproduce_matmul_2d( a_t = torch2tt_tensor(A, device, ttl.tensor.Layout.TILE, in0_mem_config, in0_dtype) b_t = torch2tt_tensor(B, device, ttl.tensor.Layout.TILE, in1_mem_config, in1_dtype) - program_config = ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=in_block_w, out_subblock_h=out_subblock_h, diff --git a/models/experimental/functional_vit/tt/ttnn_optimized_sharded_vit.py b/models/experimental/functional_vit/tt/ttnn_optimized_sharded_vit.py index fdaa6470642..05eaafe4c21 100644 --- a/models/experimental/functional_vit/tt/ttnn_optimized_sharded_vit.py +++ b/models/experimental/functional_vit/tt/ttnn_optimized_sharded_vit.py @@ -39,7 +39,7 @@ def update_model_config(config, batch_size): False, ), ), - "embedding_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "embedding_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=3, out_subblock_h=1, @@ -49,7 +49,7 @@ def update_model_config(config, batch_size): transpose_mcast=False, fused_activation=None, ), - "query_key_value_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "query_key_value_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=2, out_subblock_h=1, @@ -59,7 +59,7 @@ def update_model_config(config, batch_size): transpose_mcast=False, fused_activation=None, ), - "query_by_key_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig( + "query_by_key_matmul_program_config": ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=2, out_subblock_h=1, @@ -67,7 +67,7 @@ def update_model_config(config, batch_size): per_core_M=7, per_core_N=7, ), - "attention_probabilities_by_value_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig( + "attention_probabilities_by_value_matmul_program_config": ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=7, out_subblock_h=1, @@ -75,7 +75,7 @@ def update_model_config(config, batch_size): per_core_M=7, per_core_N=2, ), - "self_output_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "self_output_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=2, out_subblock_h=7, @@ -85,7 +85,7 @@ def update_model_config(config, batch_size): transpose_mcast=False, fused_activation=None, ), - "ff1_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "ff1_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=2, out_subblock_h=1, @@ -95,7 +95,7 @@ def update_model_config(config, batch_size): transpose_mcast=False, fused_activation=(ttnn.experimental.tensor.FusibleActivation.GELU, True), ), - "ff2_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "ff2_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=8, out_subblock_h=7, @@ -105,7 +105,7 @@ def update_model_config(config, batch_size): transpose_mcast=False, fused_activation=None, ), - "classifer_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "classifer_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=2, out_subblock_h=1, diff --git a/models/experimental/functional_vit/tt/ttnn_optimized_sharded_vit_backup.py b/models/experimental/functional_vit/tt/ttnn_optimized_sharded_vit_backup.py index 557aae803bd..aaeb459ce7b 100644 --- a/models/experimental/functional_vit/tt/ttnn_optimized_sharded_vit_backup.py +++ b/models/experimental/functional_vit/tt/ttnn_optimized_sharded_vit_backup.py @@ -39,7 +39,7 @@ def update_model_config(config, batch_size): False, ), ), - "embedding_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "embedding_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=3, out_subblock_h=1, @@ -49,7 +49,7 @@ def update_model_config(config, batch_size): transpose_mcast=False, fused_activation=None, ), - "query_key_value_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "query_key_value_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=2, out_subblock_h=1, @@ -59,7 +59,7 @@ def update_model_config(config, batch_size): transpose_mcast=False, fused_activation=None, ), - "query_by_key_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig( + "query_by_key_matmul_program_config": ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=2, out_subblock_h=1, @@ -67,7 +67,7 @@ def update_model_config(config, batch_size): per_core_M=7, per_core_N=7, ), - "attention_probabilities_by_value_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig( + "attention_probabilities_by_value_matmul_program_config": ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=7, out_subblock_h=1, @@ -75,7 +75,7 @@ def update_model_config(config, batch_size): per_core_M=7, per_core_N=2, ), - "self_output_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "self_output_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=2, out_subblock_h=7, @@ -85,7 +85,7 @@ def update_model_config(config, batch_size): transpose_mcast=False, fused_activation=None, ), - "ff1_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "ff1_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=2, out_subblock_h=1, @@ -95,7 +95,7 @@ def update_model_config(config, batch_size): transpose_mcast=False, fused_activation=(ttnn.experimental.tensor.FusibleActivation.GELU, True), ), - "ff2_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "ff2_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=8, out_subblock_h=7, @@ -105,7 +105,7 @@ def update_model_config(config, batch_size): transpose_mcast=False, fused_activation=None, ), - "classifer_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + "classifer_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(core_grid.x, core_grid.y), in0_block_w=2, out_subblock_h=1, diff --git a/models/experimental/llama2_70b/scripts/model_config_n150.py b/models/experimental/llama2_70b/scripts/model_config_n150.py index 1bc49f7e65e..aa731a234a9 100644 --- a/models/experimental/llama2_70b/scripts/model_config_n150.py +++ b/models/experimental/llama2_70b/scripts/model_config_n150.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import tt_lib as ttl +import ttnn from loguru import logger from pathlib import Path @@ -405,7 +406,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): # ) model_config["FUSED_QKV_MM_OUTPUT_MEMCFG"] = WIDTH_SHARDED_MEMCFG if num_devices == 1: - model_config["FUSED_QKV_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FUSED_QKV_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, @@ -417,7 +418,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): mcast_in0=True, ) elif num_devices == 4: - model_config["FUSED_QKV_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FUSED_QKV_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 2), in0_block_w=16, out_subblock_h=1, @@ -430,7 +431,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): ) elif num_devices == 8: # Unpadded version - model_config["FUSED_QKV_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FUSED_QKV_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 1), in0_block_w=32, out_subblock_h=1, @@ -442,7 +443,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): mcast_in0=True, ) # Padded version - # model_config["FUSED_QKV_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + # model_config["FUSED_QKV_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( # compute_with_storage_grid_size=(8, 8), # in0_block_w=4, # out_subblock_h=1, @@ -455,7 +456,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): # ) # Split QKV Matmul Config - model_config["WQ_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["WQ_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, @@ -467,7 +468,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): mcast_in0=True, ) - model_config["WK_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["WK_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, @@ -585,7 +586,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): # ) if num_devices == 8: - model_config["ROT_MAT_K_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["ROT_MAT_K_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(1, 1), in0_block_w=4, out_subblock_h=1, @@ -598,7 +599,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): ) model_config["ROT_MAT_K_MM_OUTPUT_MEMCFG"] = HEIGHT_SHARDED_MEMCFG - model_config["ROT_MAT_Q_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["ROT_MAT_Q_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 1), in0_block_w=4, out_subblock_h=1, @@ -739,7 +740,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): model_config["SELFOUT_MM_OUTPUT_MEMCFG"] = WIDTH_SHARDED_MEMCFG if num_devices == 4: # (32 x 8k) x (8k x 2k) = (32 x 2k) - model_config["SELFOUT_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["SELFOUT_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=4, out_subblock_h=1, @@ -752,7 +753,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): ) elif num_devices == 8: # (32 x 8k) x (8k x 1k) = (32 x 1k) - model_config["SELFOUT_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["SELFOUT_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, @@ -888,7 +889,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): per_core_N = 14 if num_devices == 1 else 7 # N / TILE_WIDTH / Grid_Size is based on compute_with_storage_grid_size - model_config["FF1_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FF1_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=compute_with_storage_grid_size, in0_block_w=in0_block_w, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -900,7 +901,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): mcast_in0=True, ) - model_config["FF3_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FF3_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=compute_with_storage_grid_size, in0_block_w=in0_block_w, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -913,7 +914,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): ) # FF2 Matmul Config Variables, using dim=-2 shard, All-gather/All-reduce(1D-Weight Stationary) schemes, if num_devices == 1: - model_config["FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=14, out_subblock_h=1, @@ -926,7 +927,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): ) elif num_devices == 4: if all_gather: - model_config["FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=28, # K = 28672 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, @@ -938,7 +939,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): mcast_in0=True, ) else: # All Reduce Case - model_config["FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=7, # K = 7168 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, @@ -951,7 +952,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): ) elif num_devices == 8: if all_gather: - model_config["FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=28, # K = 28672 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, @@ -963,7 +964,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): mcast_in0=True, ) else: - model_config["FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 2), in0_block_w=7, # K = 3584 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, @@ -997,7 +998,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): ), ) if num_devices == 4: - model_config["PADDED_FF1_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF1_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=4, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -1008,7 +1009,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): fused_activation=ttl.tensor.FusibleActivation.SILU, mcast_in0=True, ) - model_config["PADDED_FF3_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF3_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=4, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -1039,7 +1040,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): False, ), ) - model_config["PADDED_FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=16, # K = 32768 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, @@ -1052,7 +1053,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): ) elif num_devices == 8: - model_config["PADDED_FF1_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF1_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=4, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -1063,7 +1064,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): fused_activation=ttl.tensor.FusibleActivation.SILU, mcast_in0=True, ) - model_config["PADDED_FF3_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF3_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 8), in0_block_w=4, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -1094,7 +1095,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): False, ), ) - model_config["PADDED_FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=32, # K = 32768 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, @@ -1156,7 +1157,7 @@ def get_model_config(model_config_str, num_devices=1, all_gather=True): # LM Head model_config["LM_HEAD_MM_OUTPUT_MEMCFG"] = WIDTH_SHARDED_MEMCFG - model_config["LM_HEAD_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["LM_HEAD_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, diff --git a/models/experimental/llama2_70b/tests/perf/ivan_ff.py b/models/experimental/llama2_70b/tests/perf/ivan_ff.py index b6976fda3f2..883202b3774 100644 --- a/models/experimental/llama2_70b/tests/perf/ivan_ff.py +++ b/models/experimental/llama2_70b/tests/perf/ivan_ff.py @@ -8,6 +8,7 @@ import tt_lib import tt_lib as ttl +import ttnn from models.utility_functions import torch2tt_tensor, tt2torch_tensor @@ -66,7 +67,7 @@ def run_test_ff1( max_dst_size = 4 if USE_ACC else 8 out_subblock_w = max([i for i in range(1, max_dst_size + 1) if (per_core_N % i) == 0]) - prog_config = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + prog_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( # compute_with_storage_grid_size=(8,4), compute_with_storage_grid_size=compute_grid, in0_block_w=in0_block_w, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size diff --git a/models/experimental/llama2_70b/tests/perf/test_bmm.py b/models/experimental/llama2_70b/tests/perf/test_bmm.py index 9ca3f210f4d..aa273649864 100644 --- a/models/experimental/llama2_70b/tests/perf/test_bmm.py +++ b/models/experimental/llama2_70b/tests/perf/test_bmm.py @@ -8,6 +8,7 @@ import tt_lib import tt_lib as ttl +import ttnn from models.experimental.llama2_70b.reference.llama.llama import Llama from models.experimental.llama2_70b.tt.model_config import ( get_model_config, @@ -156,7 +157,7 @@ def run_test( q_tt = torch2tt_tensor(q_in, device) # , tt_memory_config=inp_mem_config) k_tt = torch2tt_tensor(k_in, device, tt_memory_config=k_mem_config) - prog_config = tt_lib.operations.primary.MatmulMultiCoreReuseProgramConfig( + prog_config = ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=[8, 4], in0_block_w=HEAD_DIM // TILE_SIZE, out_subblock_h=1, # TODO: Maximize diff --git a/models/experimental/llama2_70b/tests/perf/test_ff1.py b/models/experimental/llama2_70b/tests/perf/test_ff1.py index 6d3f3a936d9..fd6b99b4521 100644 --- a/models/experimental/llama2_70b/tests/perf/test_ff1.py +++ b/models/experimental/llama2_70b/tests/perf/test_ff1.py @@ -8,6 +8,7 @@ import tt_lib import tt_lib as ttl +import ttnn from models.experimental.llama2_70b.reference.llama.llama import Llama from models.experimental.llama2_70b.tt.model_config import ( get_model_config, @@ -85,7 +86,7 @@ def run_test_ff1( max_dst_size = 4 if USE_ACC else 8 out_subblock_w = max([i for i in range(1, max_dst_size + 1) if (per_core_N % i) == 0]) - prog_config = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + prog_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=compute_with_storage_grid_size, in0_block_w=in0_block_w, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M diff --git a/models/experimental/llama2_70b/tests/perf/test_llama_matmul_perf.py b/models/experimental/llama2_70b/tests/perf/test_llama_matmul_perf.py index c8f6334685c..371a4a80728 100644 --- a/models/experimental/llama2_70b/tests/perf/test_llama_matmul_perf.py +++ b/models/experimental/llama2_70b/tests/perf/test_llama_matmul_perf.py @@ -8,6 +8,7 @@ import tt_lib import tt_lib as ttl +import ttnn from models.utility_functions import torch2tt_tensor, tt2torch_tensor @@ -42,7 +43,7 @@ def __init__(self, device): tt_dtype=BFP8_DTYPE, ) - self.prog_config = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.prog_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -76,7 +77,7 @@ def __init__(self, device): tt_dtype=BFP8_DTYPE, ) - self.prog_config = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.prog_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=32, out_subblock_h=1, @@ -123,7 +124,7 @@ def __init__(self, device): tt_dtype=BFP8_DTYPE, ) - self.prog_config1 = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.prog_config1 = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # how much inner dim you take each time out_subblock_h=2, # Must be divisible by per_core_M @@ -135,7 +136,7 @@ def __init__(self, device): fuse_batch=True, ) - self.prog_config3 = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.prog_config3 = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # how much inner dim you take each time out_subblock_h=2, # Must be divisible by per_core_M @@ -147,7 +148,7 @@ def __init__(self, device): fuse_batch=True, ) - self.prog_config2 = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + self.prog_config2 = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=32, # how much inner dim you take each time out_subblock_h=4, # Must be divisible by per_core_M @@ -302,7 +303,7 @@ def __init__(self, device): ) in0_block_w = 4 - self.prog_config1 = ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + self.prog_config1 = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( # [2048 x 8192] x [8192 x 4096] compute_with_storage_grid_size=(8, 8), in0_block_w=in0_block_w, # how much inner dim you take each time @@ -314,7 +315,7 @@ def __init__(self, device): transpose_mcast=False, ) - self.prog_config3 = ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + self.prog_config3 = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( # [2048 x 8192] x [8192 x 4096] compute_with_storage_grid_size=(8, 8), in0_block_w=in0_block_w, # how much inner dim you take each time @@ -326,7 +327,7 @@ def __init__(self, device): transpose_mcast=False, ) - self.prog_config2 = ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + self.prog_config2 = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( # [2048 x 32768] x [32768 x 1024] compute_with_storage_grid_size=(8, 8), in0_block_w=in0_block_w, # how much inner dim you take each time diff --git a/models/experimental/llama2_70b/tests/unit_tests/test_rotary_matmul.py b/models/experimental/llama2_70b/tests/unit_tests/test_rotary_matmul.py index 4e656ceabda..7ccb793d138 100644 --- a/models/experimental/llama2_70b/tests/unit_tests/test_rotary_matmul.py +++ b/models/experimental/llama2_70b/tests/unit_tests/test_rotary_matmul.py @@ -76,7 +76,7 @@ def run_test_rotary_matmul1( ) L1_MEMCFG = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1) - ROT_MAT_Q_MM_PROGCFG = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + ROT_MAT_Q_MM_PROGCFG = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 1), in0_block_w=4, out_subblock_h=1, @@ -189,7 +189,7 @@ def run_test_rotary_matmul2( ) L1_MEMCFG = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1) - ROT_MAT_MM_PROGCFG = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + ROT_MAT_MM_PROGCFG = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=4, out_subblock_h=1, @@ -303,7 +303,7 @@ def run_test_rotary_matmul3( ) L1_MEMCFG = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1) - ROT_MAT_MM_PROGCFG = ttl.operations.primary.MatmulMultiCoreReuseProgramConfig( + ROT_MAT_MM_PROGCFG = ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=[8, 4], in0_block_w=4, # 128 // TILE_SIZE (dynamic) out_subblock_h=1, @@ -425,7 +425,7 @@ def run_test_rotary_matmul4( False, ), ) - ROT_MAT_MM_PROGCFG = ttl.operations.primary.MatmulMultiCoreReuseProgramConfig( + ROT_MAT_MM_PROGCFG = ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=[8, 4], in0_block_w=4, # 128 // TILE_SIZE (dynamic) out_subblock_h=1, diff --git a/models/experimental/llama2_70b/tt/model_config.py b/models/experimental/llama2_70b/tt/model_config.py index a6b958b0d83..f3828e3c754 100644 --- a/models/experimental/llama2_70b/tt/model_config.py +++ b/models/experimental/llama2_70b/tt/model_config.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import tt_lib as ttl +import ttnn from loguru import logger import os from pathlib import Path @@ -283,7 +284,7 @@ def get_model_config( inplace=True, ) # LM Head - model_config["LM_HEAD_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["LM_HEAD_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, @@ -294,7 +295,7 @@ def get_model_config( fused_activation=None, mcast_in0=True, ) - model_config["LLAMA3_LM_HEAD_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["LLAMA3_LM_HEAD_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, out_subblock_h=1, @@ -326,7 +327,7 @@ def get_model_config( block_w=num_tiles_per_core_w, inplace=True, ) - model_config["LM_HEAD_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + model_config["LM_HEAD_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # how much inner dim you take each time out_subblock_h=1, # Must be divisible by per_core_M @@ -339,7 +340,7 @@ def get_model_config( cores_y = 4 if seq_len == 128 else 8 max_mm_seq_tiles = min(seq_len, model_config["MAX_MM_SEQ_LEN"]) // 32 - model_config["LLAMA3_LM_HEAD_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + model_config["LLAMA3_LM_HEAD_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(8, cores_y), in0_block_w=1, # how much inner dim you take each time out_subblock_h=1, # Must be divisible by per_core_M @@ -421,7 +422,7 @@ def get_model_config( cores_y = 4 # 8 if seq_len_tiles % 8 == 0 else 4 max_mm_seq_tiles = min(seq_len, model_config["MAX_MM_SEQ_LEN"]) // 32 in0_block_w = 32 if seq_len == 128 else 8 # smaller in0_block_w for larger seq_len to fit in L1) - model_config["FUSED_QKV_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + model_config["FUSED_QKV_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(8, cores_y), in0_block_w=in0_block_w, # how much inner dim you take each time out_subblock_h=1, # Must be divisible by per_core_M @@ -433,7 +434,7 @@ def get_model_config( fuse_batch=False, ) else: - model_config["FUSED_QKV_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["FUSED_QKV_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 5), in0_block_w=8, out_subblock_h=1, @@ -460,7 +461,7 @@ def get_model_config( ) if llm_mode == "decode": - model_config["ROT_MAT_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseProgramConfig( + model_config["ROT_MAT_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=batch_size_coregrid, in0_block_w=4, # 128 // TILE_SIZE (dynamic) out_subblock_h=1, @@ -496,7 +497,7 @@ def get_model_config( ) model_config[ "ATTN_BATCHED_MM_PROGCFG_LAMBDA" - ] = lambda seq_tiles: ttl.operations.primary.MatmulMultiCoreReuseProgramConfig( + ] = lambda seq_tiles: ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=[8, 4], in0_block_w=head_dim // 32, # HEAD_DIM // TILE_SIZE out_subblock_h=1, # TODO: Maximize @@ -534,7 +535,7 @@ def get_model_config( model_config[ "SCORES_BATCHED_MM_PROGCFG_LAMBDA" - ] = lambda seq_tiles: ttl.operations.primary.MatmulMultiCoreReuseProgramConfig( + ] = lambda seq_tiles: ttnn.MatmulMultiCoreReuseProgramConfig( compute_with_storage_grid_size=[8, 4], in0_block_w=seq_tiles, # SEQ_LEN // TILE_SIZE (dynamic) out_subblock_h=1, # TODO: Maximize @@ -609,7 +610,7 @@ def get_model_config( ), ) if llm_mode == "decode": - model_config["SELFOUT_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["SELFOUT_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # (32 x 8k) x (8k x 1k) = (32 x 1k) out_subblock_h=1, @@ -624,7 +625,7 @@ def get_model_config( cores_y = 4 # 8 if seq_len_tiles % 8 == 0 else 4 max_mm_seq_tiles = min(seq_len, model_config["MAX_MM_SEQ_LEN"]) // 32 in0_block_w = 32 if seq_len == 128 else 8 # smaller in0_block_w for larger seq_len to fit in L1) - model_config["SELFOUT_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + model_config["SELFOUT_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(8, cores_y), in0_block_w=8, # how much inner dim you take each time out_subblock_h=1, # Must be divisible by per_core_M @@ -638,7 +639,7 @@ def get_model_config( # Llama MLP config # Padded MLP 32K config: if llm_mode == "decode": - model_config["PADDED_FF1_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF1_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -649,7 +650,7 @@ def get_model_config( fused_activation=ttl.tensor.FusibleActivation.SILU, mcast_in0=True, ) - model_config["PADDED_FF3_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF3_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=8, # K = 8192 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, # Must be divisible by per_core_M @@ -660,7 +661,7 @@ def get_model_config( fused_activation=None, mcast_in0=True, ) - model_config["PADDED_FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + model_config["PADDED_FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=32, # K = 32768 / TILE_WIDTH=32 / Grid_Size is based on compute_with_storage_grid_size out_subblock_h=1, @@ -675,7 +676,7 @@ def get_model_config( # Llama MLP Module Prefill cores_y = 4 # 8 if seq_tiles % 8 == 0 else 4 max_mm_seq_tiles = min(seq_len, model_config["MAX_MM_SEQ_LEN"]) // 32 - model_config["PADDED_FF1_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + model_config["PADDED_FF1_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(8, cores_y), in0_block_w=4, # how much inner dim you take each time out_subblock_h=1, # Must be divisible by per_core_M @@ -687,7 +688,7 @@ def get_model_config( fuse_batch=False, ) - model_config["PADDED_FF3_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + model_config["PADDED_FF3_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(8, cores_y), in0_block_w=4, # how much inner dim you take each time out_subblock_h=1, # Must be divisible by per_core_M @@ -701,7 +702,7 @@ def get_model_config( # input0: [1,32,128,32k] # input1: [1,1,32k,1k] - model_config["PADDED_FF2_MM_PROGCFG"] = ttl.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig( + model_config["PADDED_FF2_MM_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig( compute_with_storage_grid_size=(8, cores_y), in0_block_w=4, # how much inner dim you take each time out_subblock_h=1, # Must be divisible by per_core_M diff --git a/models/experimental/resnet/tt/ttnn_functional_resnet50.py b/models/experimental/resnet/tt/ttnn_functional_resnet50.py index 0b432192457..f0cac68db3a 100644 --- a/models/experimental/resnet/tt/ttnn_functional_resnet50.py +++ b/models/experimental/resnet/tt/ttnn_functional_resnet50.py @@ -11,7 +11,7 @@ ) hardcoded_matmul_config_linear = { - 8: ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + 8: ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=2, out_subblock_h=1, @@ -22,7 +22,7 @@ fused_activation=None, mcast_in0=True, ), - 16: ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + 16: ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=2, out_subblock_h=1, @@ -33,7 +33,7 @@ fused_activation=None, mcast_in0=True, ), - 20: ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + 20: ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=2, out_subblock_h=1, diff --git a/models/experimental/resnet/tt/ttnn_functional_resnet50_new_conv_api.py b/models/experimental/resnet/tt/ttnn_functional_resnet50_new_conv_api.py index f593d21bb16..9a3232ee2e5 100644 --- a/models/experimental/resnet/tt/ttnn_functional_resnet50_new_conv_api.py +++ b/models/experimental/resnet/tt/ttnn_functional_resnet50_new_conv_api.py @@ -12,7 +12,7 @@ from typing import List hardcoded_matmul_config_linear = { - 8: ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + 8: ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=2, out_subblock_h=1, @@ -23,7 +23,7 @@ fused_activation=None, mcast_in0=True, ), - 16: ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + 16: ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=2, out_subblock_h=1, @@ -34,7 +34,7 @@ fused_activation=None, mcast_in0=True, ), - 20: ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig( + 20: ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), in0_block_w=2, out_subblock_h=1,