Skip to content

Commit

Permalink
#9492: Update parameter names for linear in test_experimental.py (#9627)
Browse files Browse the repository at this point in the history
* #9492: Update parameter names for linear in test_experimental.py

* #9492: Add import ttnn when convert matmul program config to ttnn
  • Loading branch information
bbradelTT authored Jun 24, 2024
1 parent 187a933 commit 334e963
Show file tree
Hide file tree
Showing 36 changed files with 314 additions and 356 deletions.
12 changes: 6 additions & 6 deletions models/demos/bert/tt/ttnn_optimized_sharded_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def update_model_config(config, batch_size):
core_grid = ttnn.CoreGrid(y=8, x=batch_size)

program_configs = {
"query_key_value_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
"query_key_value_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=(core_grid.x, core_grid.y),
in0_block_w=4,
out_subblock_h=1,
Expand All @@ -30,23 +30,23 @@ def update_model_config(config, batch_size):
transpose_mcast=True,
fused_activation=None,
),
"query_by_key_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig(
"query_by_key_matmul_program_config": ttnn.MatmulMultiCoreReuseProgramConfig(
compute_with_storage_grid_size=(core_grid.x, core_grid.y),
in0_block_w=2,
out_subblock_h=1,
out_subblock_w=6,
per_core_M=24,
per_core_N=12,
),
"attention_probabilities_by_value_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig(
"attention_probabilities_by_value_matmul_program_config": ttnn.MatmulMultiCoreReuseProgramConfig(
compute_with_storage_grid_size=(core_grid.x, core_grid.y),
in0_block_w=12,
out_subblock_h=4,
out_subblock_w=2,
per_core_M=24,
per_core_N=2,
),
"self_output_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
"self_output_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=(core_grid.x, core_grid.y),
in0_block_w=4,
out_subblock_h=2,
Expand All @@ -56,7 +56,7 @@ def update_model_config(config, batch_size):
transpose_mcast=True,
fused_activation=None,
),
"ff1_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
"ff1_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=(core_grid.x, core_grid.y),
in0_block_w=4,
out_subblock_h=1,
Expand All @@ -66,7 +66,7 @@ def update_model_config(config, batch_size):
transpose_mcast=True,
fused_activation=(ttnn.experimental.tensor.FusibleActivation.GELU, True),
),
"ff2_matmul_program_config": ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
"ff2_matmul_program_config": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=(core_grid.x, core_grid.y),
in0_block_w=16,
out_subblock_h=2,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def test_falcon7b_attnention_sliced(
subblock_w = 1
if seq_len == 2048:
subblock_w = 8 # best option
program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig(
program_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig(
compute_with_storage_grid_size=grid_size,
in0_block_w=2,
per_core_M=tiles_per_shard,
Expand Down Expand Up @@ -415,7 +415,7 @@ def test_falcon7b_attnention_sliced(

subblock_w = 2
subblock_h = 1
program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig(
program_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig(
compute_with_storage_grid_size=grid_size,
in0_block_w=seq_len // 32,
per_core_M=tiles_per_shard,
Expand Down Expand Up @@ -641,7 +641,7 @@ def test_falcon7b_attention_softmax_sequence(
subblock_w = 1
if seq_len == 2048:
subblock_w = 8 # best option
program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig(
program_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig(
compute_with_storage_grid_size=grid_size,
in0_block_w=2,
per_core_M=tiles_per_shard,
Expand Down Expand Up @@ -686,7 +686,7 @@ def test_falcon7b_attention_softmax_sequence(

subblock_w = 2
subblock_h = 1
program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig(
program_config = ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig(
compute_with_storage_grid_size=grid_size,
in0_block_w=seq_len // 32,
per_core_M=tiles_per_shard,
Expand Down
2 changes: 1 addition & 1 deletion models/demos/falcon7b/tt/falcon_lm_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def falcon_lm_head_matmul_2d(
per_core_N = nearest_y(weights_n_in_tiles / grid.x, out_subblock_w)
in0_block_w = 4 if seq_len <= 1024 else 8

program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=grid,
in0_block_w=in0_block_w,
out_subblock_h=out_subblock_h,
Expand Down
14 changes: 6 additions & 8 deletions models/demos/falcon7b/tt/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def get_model_config(model_config_str, prefill_seq_len=0, decode_batch_size=32):

model_config[
"ATTN_BATCHED_MM_PROGCFG"
] = lambda block_w, per_core_M, per_core_N: ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig(
] = lambda block_w, per_core_M, per_core_N: ttnn.MatmulMultiCoreReuseProgramConfig(
compute_with_storage_grid_size=[8, 4],
in0_block_w=block_w,
out_subblock_h=1, # TODO: Maximize
Expand Down Expand Up @@ -326,7 +326,7 @@ def set_prefill_config(model_config, seq_len, dram_memcfg):
)
model_config["MLP_KERNEL_CONFIG"] = default_kernel_config

mm_h_to_4h_prog_cfg = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
mm_h_to_4h_prog_cfg = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=model_config["MLP_GRID_SIZE"],
in0_block_w=3,
out_subblock_h=1,
Expand All @@ -338,7 +338,7 @@ def set_prefill_config(model_config, seq_len, dram_memcfg):
)
model_config["DENSE_H_TO_4H_MM_PROGCFG"] = mm_h_to_4h_prog_cfg

mm_4h_to_h_prog_cfg = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
mm_4h_to_h_prog_cfg = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=model_config["MLP_GRID_SIZE"],
in0_block_w=8,
out_subblock_h=1,
Expand All @@ -352,9 +352,7 @@ def set_prefill_config(model_config, seq_len, dram_memcfg):
model_config["MLP_INTERLEAVED_TO_SHARDED_MEM_CFG"] = dram_memcfg

model_config["FUSED_QKV_MM_OPTIMIZED_MEMCFG"] = dram_memcfg
model_config[
"FUSED_QKV_MM_OPTIMIZED_PROGCFG"
] = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
model_config["FUSED_QKV_MM_OPTIMIZED_PROGCFG"] = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=(8, 8),
in0_block_w=2,
per_core_M=8,
Expand All @@ -380,7 +378,7 @@ def set_prefill_config(model_config, seq_len, dram_memcfg):

model_config[
"QKT_OPTIMIZED_PROGCFG"
] = lambda tiles_per_shard, seq_len, subblock_h, subblock_w: ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig(
] = lambda tiles_per_shard, seq_len, subblock_h, subblock_w: ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig(
compute_with_storage_grid_size=model_config["ATTN_OPTIMIZED_GRID_SIZE"],
in0_block_w=2,
per_core_M=tiles_per_shard,
Expand Down Expand Up @@ -414,7 +412,7 @@ def set_prefill_config(model_config, seq_len, dram_memcfg):

model_config[
"QKTV_MM_OPTIMIZED_PROGCFG"
] = lambda tiles_per_shard, seq_len, subblock_h: ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCast1DProgramConfig(
] = lambda tiles_per_shard, seq_len, subblock_h: ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig(
compute_with_storage_grid_size=model_config["ATTN_OPTIMIZED_GRID_SIZE"],
in0_block_w=seq_len // 32,
per_core_M=tiles_per_shard,
Expand Down
12 changes: 6 additions & 6 deletions models/demos/metal_BERT_large_11/tt/custom_matmuls.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def bert_large_fused_qkv_matmul(
assert input_tensor_a.get_legacy_shape() == [batch_size, 1, 384, 1024], "Unsupported input shape"
assert input_tensor_b.get_legacy_shape() == [1, 1, 1024, 3072], "Unsupported input shape"

program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=(12, batch_size),
in0_block_w=4,
out_subblock_h=4,
Expand Down Expand Up @@ -59,7 +59,7 @@ def bert_large_ff1_matmul(
assert input_tensor_a.get_legacy_shape() == [batch_size, 1, 384, 1024], "Unsupported input shape"
assert input_tensor_b.get_legacy_shape() == [1, 1, 1024, 4096], "Unsupported input shape"

program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=(12, batch_size),
in0_block_w=4,
out_subblock_h=6,
Expand Down Expand Up @@ -87,7 +87,7 @@ def bert_large_ff2_matmul(
assert input_tensor_a.get_legacy_shape() == [batch_size, 1, 384, 4096], "Unsupported input shape"
assert input_tensor_b.get_legacy_shape() == [1, 1, 4096, 1024], "Unsupported input shape"

program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=(12, batch_size),
in0_block_w=4,
out_subblock_h=6,
Expand Down Expand Up @@ -115,7 +115,7 @@ def bert_large_selfout_matmul(
assert input_tensor_a.get_legacy_shape() == [batch_size, 1, 384, 1024], "Unsupported input shape"
assert input_tensor_b.get_legacy_shape() == [1, 1, 1024, 1024], "Unsupported input shape"

program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
program_config = ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=(12, batch_size),
in0_block_w=4,
out_subblock_h=6,
Expand Down Expand Up @@ -143,7 +143,7 @@ def bert_large_pre_softmax_bmm(
assert input_tensor_a.get_legacy_shape() == [batch_size, 16, 384, 64], "Unsupported input shape"
assert input_tensor_b.get_legacy_shape() == [batch_size, 16, 64, 384], "Unsupported input shape"

program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig(
program_config = ttnn.MatmulMultiCoreReuseProgramConfig(
compute_with_storage_grid_size=(12, batch_size),
in0_block_w=1,
out_subblock_h=4,
Expand All @@ -168,7 +168,7 @@ def bert_large_post_softmax_bmm(
assert input_tensor_a.get_legacy_shape() == [batch_size, 16, 384, 384], "Unsupported input shape"
assert input_tensor_b.get_legacy_shape() == [batch_size, 16, 384, 64], "Unsupported input shape"

program_config = ttnn.experimental.operations.primary.MatmulMultiCoreReuseProgramConfig(
program_config = ttnn.MatmulMultiCoreReuseProgramConfig(
compute_with_storage_grid_size=(12, batch_size),
in0_block_w=2,
out_subblock_h=4,
Expand Down
23 changes: 12 additions & 11 deletions models/demos/metal_BERT_large_11/tt/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# SPDX-License-Identifier: Apache-2.0

import tt_lib
import ttnn
from loguru import logger
from pathlib import Path
from models.utility_functions import is_wormhole_b0
Expand Down Expand Up @@ -197,23 +198,23 @@ def get_model_config(batch, device_grid_size, model_config_str):
elif model_config_str == "BFLOAT8_B-L1" or model_config_str == "BFLOAT8_B-DRAM":
grid_size = [12, batch]
new_config_values = {
"OP3_PRE_SOFTMAX_BMM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseProgramConfig(
"OP3_PRE_SOFTMAX_BMM_CONFIG": ttnn.MatmulMultiCoreReuseProgramConfig(
compute_with_storage_grid_size=grid_size,
in0_block_w=2,
out_subblock_h=1,
out_subblock_w=6,
per_core_M=12,
per_core_N=12,
),
"OP5_POST_SOFTMAX_BMM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseProgramConfig(
"OP5_POST_SOFTMAX_BMM_CONFIG": ttnn.MatmulMultiCoreReuseProgramConfig(
compute_with_storage_grid_size=grid_size,
in0_block_w=12,
out_subblock_h=4,
out_subblock_w=2,
per_core_M=12,
per_core_N=2,
),
"OP7_SELFOUT_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
"OP7_SELFOUT_CONFIG": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=grid_size,
in0_block_w=4,
out_subblock_h=2,
Expand All @@ -223,7 +224,7 @@ def get_model_config(batch, device_grid_size, model_config_str):
transpose_mcast=False,
fused_activation=None,
),
"OP9_FF1_MM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
"OP9_FF1_MM_CONFIG": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=grid_size,
in0_block_w=4,
out_subblock_h=1,
Expand All @@ -233,7 +234,7 @@ def get_model_config(batch, device_grid_size, model_config_str):
transpose_mcast=False,
fused_activation=(tt_lib.tensor.FusibleActivation.GELU, True),
),
"OP10_FF2_MM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
"OP10_FF2_MM_CONFIG": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=grid_size,
in0_block_w=16,
out_subblock_h=2,
Expand Down Expand Up @@ -326,7 +327,7 @@ def get_model_config(batch, device_grid_size, model_config_str):
"OP11_LAYERNORM_GAMMA_MEMCFG": DRAM_MEMCFG,
"OP11_LAYERNORM_BETA_MEMCFG": DRAM_MEMCFG,
"RESERVE_SPLIT_HEADS_SHAPE": [1, 1, 1, 153 * 1024 // 2],
"OP1_FUSED_QKV_MM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
"OP1_FUSED_QKV_MM_CONFIG": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=grid_size,
in0_block_w=4,
out_subblock_h=1,
Expand All @@ -336,23 +337,23 @@ def get_model_config(batch, device_grid_size, model_config_str):
transpose_mcast=transpose_mm_mcast,
fused_activation=None,
),
"OP3_PRE_SOFTMAX_BMM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseProgramConfig(
"OP3_PRE_SOFTMAX_BMM_CONFIG": ttnn.MatmulMultiCoreReuseProgramConfig(
compute_with_storage_grid_size=grid_size,
in0_block_w=2,
out_subblock_h=1,
out_subblock_w=6,
per_core_M=24,
per_core_N=12,
),
"OP5_POST_SOFTMAX_BMM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseProgramConfig(
"OP5_POST_SOFTMAX_BMM_CONFIG": ttnn.MatmulMultiCoreReuseProgramConfig(
compute_with_storage_grid_size=grid_size,
in0_block_w=12,
out_subblock_h=4,
out_subblock_w=2,
per_core_M=24,
per_core_N=2,
),
"OP7_SELFOUT_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
"OP7_SELFOUT_CONFIG": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=grid_size,
in0_block_w=4,
out_subblock_h=2,
Expand All @@ -362,7 +363,7 @@ def get_model_config(batch, device_grid_size, model_config_str):
transpose_mcast=transpose_mm_mcast,
fused_activation=None,
),
"OP9_FF1_MM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
"OP9_FF1_MM_CONFIG": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=grid_size,
in0_block_w=4,
out_subblock_h=1,
Expand All @@ -372,7 +373,7 @@ def get_model_config(batch, device_grid_size, model_config_str):
transpose_mcast=transpose_mm_mcast,
fused_activation=(tt_lib.tensor.FusibleActivation.GELU, True),
),
"OP10_FF2_MM_CONFIG": tt_lib.operations.primary.MatmulMultiCoreReuseMultiCastProgramConfig(
"OP10_FF2_MM_CONFIG": ttnn.MatmulMultiCoreReuseMultiCastProgramConfig(
compute_with_storage_grid_size=grid_size,
in0_block_w=16,
out_subblock_h=2,
Expand Down
Loading

0 comments on commit 334e963

Please sign in to comment.