Skip to content

Commit

Permalink
#9747: Implement ttnn.tilize(_with_val_padding) Python bindings
Browse files Browse the repository at this point in the history
  • Loading branch information
yan-zaretskiy committed Jul 17, 2024
1 parent f86c0c1 commit 0ecbb36
Show file tree
Hide file tree
Showing 39 changed files with 367 additions and 182 deletions.
18 changes: 9 additions & 9 deletions models/demos/falcon7b/tt/falcon_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,10 @@ def model_preprocessing(self, llm_mode, input_ids, kv_cache_len, num_input_token
# Tilize attn masks
for tt_attention_mask_slice in attn_masks_unordered:
for i in range(self.num_devices):
tt_attention_mask_slice[i] = ttnn.experimental.tensor.tilize(
tt_attention_mask_slice[i] = ttnn.tilize(
tt_attention_mask_slice[i],
output_mem_config=self.model_config["ATTN_MASK_MEMCFG"],
output_dtype=self.model_config["ATTN_MASK_OPTIMIZED_PREFILL_DTYPE"],
memory_config=self.model_config["ATTN_MASK_MEMCFG"],
dtype=self.model_config["ATTN_MASK_OPTIMIZED_PREFILL_DTYPE"],
)
# Expected output attention_masks
# [dev0: [slice0, slice1, ...], dev1: [slice0, slice1, ...], ...]
Expand All @@ -166,10 +166,10 @@ def model_preprocessing(self, llm_mode, input_ids, kv_cache_len, num_input_token
)
# Tilize attn masks
for i in range(self.num_devices):
tt_attention_mask[i] = ttnn.experimental.tensor.tilize(
tt_attention_mask[i] = ttnn.tilize(
tt_attention_mask[i],
output_mem_config=self.model_config["ATTN_MASK_MEMCFG"],
output_dtype=self.model_config["ATTN_MASK_DTYPE"],
memory_config=self.model_config["ATTN_MASK_MEMCFG"],
dtype=self.model_config["ATTN_MASK_DTYPE"],
)

tt_input_ids = []
Expand Down Expand Up @@ -220,10 +220,10 @@ def model_preprocessing(self, llm_mode, input_ids, kv_cache_len, num_input_token
if not self.model_config["l1_sharded"]:
# Tilize attn masks
for i in range(self.num_devices):
tt_attention_mask[i] = ttnn.experimental.tensor.tilize(
tt_attention_mask[i] = ttnn.tilize(
tt_attention_mask[i],
output_mem_config=self.model_config["ATTN_MASK_MEMCFG"],
output_dtype=self.model_config["ATTN_MASK_DTYPE"],
memory_config=self.model_config["ATTN_MASK_MEMCFG"],
dtype=self.model_config["ATTN_MASK_DTYPE"],
)

for i, device in enumerate(self.devices):
Expand Down
32 changes: 16 additions & 16 deletions models/demos/resnet/tt/metalResnetBlock50.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def format_tensor(x, target_layout, device, output_mem_config, pad_value=0.0):
x, device, x_padded_shape, pad_value, target_layout, output_mem_config
)
else:
return tt_lib.tensor.tilize(x, output_mem_config, use_multicore=True)
return ttnn.tilize(x, memory_config=output_mem_config, use_multicore=True)
elif x.get_layout() == tt_lib.tensor.Layout.TILE and target_layout == tt_lib.tensor.Layout.ROW_MAJOR:
if x.get_legacy_shape() != x.shape_without_padding():
return tt_lib.tensor.format_output_tensor(
Expand Down Expand Up @@ -2187,10 +2187,10 @@ def forward(self, x: tt_lib.tensor, write_event=None, op_event=None, final_out_m
self.maxpool_output_shape[0] * self.maxpool_output_shape[1] * self.maxpool_output_shape[2],
self.maxpool_output_shape[3],
)
x = tt_lib.tensor.tilize(
x = ttnn.tilize(
x,
output_mem_config=self.height_sharded_memory_config,
output_dtype=self.model_config["ACTIVATIONS_DTYPE"],
memory_config=self.height_sharded_memory_config,
dtype=self.model_config["ACTIVATIONS_DTYPE"],
use_multicore=True,
)
if self.batch_size == 20:
Expand Down Expand Up @@ -2314,12 +2314,12 @@ def forward(self, x: tt_lib.tensor, write_event=None, op_event=None, final_out_m
_nearest_32(unpadded_shape[3]),
]
if self.sharded:
x = tt_lib.tensor.tilize_with_val_padding(
x = ttnn.tilize_with_val_padding(
x,
padded_shape,
0,
output_mem_config=self.width_sharded_memory_config,
output_dtype=self.model_config["ACTIVATIONS_DTYPE"],
memory_config=self.width_sharded_memory_config,
dtype=self.model_config["ACTIVATIONS_DTYPE"],
)
else:
x = ttnn.pad(
Expand All @@ -2330,10 +2330,10 @@ def forward(self, x: tt_lib.tensor, write_event=None, op_event=None, final_out_m
memory_config=self.memory_config,
use_multicore=True,
)
x = tt_lib.tensor.tilize(
x = ttnn.tilize(
x,
output_mem_config=self.memory_config,
output_dtype=self.model_config["ACTIVATIONS_DTYPE"],
memory_config=self.memory_config,
dtype=self.model_config["ACTIVATIONS_DTYPE"],
use_multicore=True,
)

Expand Down Expand Up @@ -2365,12 +2365,12 @@ def forward(self, x: tt_lib.tensor, write_event=None, op_event=None, final_out_m
_nearest_32(unpadded_shape[3]),
]
if self.sharded:
x = tt_lib.tensor.tilize_with_val_padding(
x = ttnn.tilize_with_val_padding(
x,
padded_shape,
0,
output_mem_config=self.width_sharded_memory_config,
output_dtype=self.model_config["ACTIVATIONS_DTYPE"],
memory_config=self.width_sharded_memory_config,
dtype=self.model_config["ACTIVATIONS_DTYPE"],
)
else:
x = ttnn.pad(
Expand All @@ -2381,10 +2381,10 @@ def forward(self, x: tt_lib.tensor, write_event=None, op_event=None, final_out_m
memory_config=self.memory_config,
use_multicore=True,
)
x = tt_lib.tensor.tilize(
x = ttnn.tilize(
x,
output_mem_config=self.memory_config,
output_dtype=self.model_config["ACTIVATIONS_DTYPE"],
memory_config=self.memory_config,
dtype=self.model_config["ACTIVATIONS_DTYPE"],
use_multicore=True,
)

Expand Down
18 changes: 11 additions & 7 deletions models/demos/t3000/falcon40b/tt/falcon_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,11 @@ def __init__(
self.num_layers = num_layers
self.hidden_size = config.hidden_size
self.num_devices = device_mesh.get_num_devices()
self.ln_output_tensors_dict = {"final_layernorm": dict(), "mlp_layernorm": dict(), "attn_layernorm": dict()}
self.ln_output_tensors_dict = {
"final_layernorm": dict(),
"mlp_layernorm": dict(),
"attn_layernorm": dict(),
}

# Word Embeddings
self.embeddings = TtFalconEmbeddings(
Expand Down Expand Up @@ -138,10 +142,10 @@ def create_attn_mask(self, max_seq_len):
preprocess=lambda x: (x * -1e5),
)

tt_attn_mask = ttnn.experimental.tensor.tilize(
tt_attn_mask = ttnn.tilize(
tt_attn_mask,
output_mem_config=attention_mask_memconfig,
output_dtype=self.model_config["ATTN_MASK_DTYPE"],
memory_config=attention_mask_memconfig,
dtype=self.model_config["ATTN_MASK_DTYPE"],
)
return tt_attn_mask

Expand Down Expand Up @@ -235,10 +239,10 @@ def model_preprocessing(self, llm_mode, input_ids, kv_cache_len, num_input_token
preprocess=lambda x: (x.transpose(0, 2) * -1e5).expand(-1, self.config.num_attention_heads, -1, -1),
)

tt_attention_mask = ttnn.experimental.tensor.tilize(
tt_attention_mask = ttnn.tilize(
tt_attention_mask,
output_mem_config=attention_mask_memconfig,
output_dtype=self.model_config["ATTN_MASK_DTYPE"],
memory_config=attention_mask_memconfig,
dtype=self.model_config["ATTN_MASK_DTYPE"],
)

else:
Expand Down
12 changes: 6 additions & 6 deletions models/demos/ttnn_resnet/tt/ttnn_functional_resnet50.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,12 +727,12 @@ def __call__(self, input_tensor) -> ttnn.Tensor:
_nearest_32(unpadded_shape[2]),
_nearest_32(unpadded_shape[3]),
]
x = ttnn.experimental.tensor.tilize_with_val_padding(
x = ttnn.tilize_with_val_padding(
x,
padded_shape,
0,
output_mem_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
output_dtype=self.model_config["ACTIVATIONS_DTYPE"],
memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
dtype=self.model_config["ACTIVATIONS_DTYPE"],
)

x = self.avgpool(x, memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG)
Expand All @@ -759,12 +759,12 @@ def __call__(self, input_tensor) -> ttnn.Tensor:
_nearest_32(unpadded_shape[3]),
]

x = ttnn.experimental.tensor.tilize_with_val_padding(
x = ttnn.tilize_with_val_padding(
x,
padded_shape,
0,
output_mem_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
output_dtype=self.model_config["ACTIVATIONS_DTYPE"],
memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
dtype=self.model_config["ACTIVATIONS_DTYPE"],
)

x = self.fc(x)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -755,12 +755,12 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
_nearest_32(unpadded_shape[2]),
_nearest_32(unpadded_shape[3]),
]
x = ttnn.experimental.tensor.tilize_with_val_padding(
x = ttnn.tilize_with_val_padding(
x,
padded_shape,
0,
output_mem_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
output_dtype=self.model_config["ACTIVATIONS_DTYPE"],
memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
dtype=self.model_config["ACTIVATIONS_DTYPE"],
)

x = self.avgpool(x, memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG)
Expand All @@ -787,12 +787,12 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
_nearest_32(unpadded_shape[3]),
]

x = ttnn.experimental.tensor.tilize_with_val_padding(
x = ttnn.tilize_with_val_padding(
x,
padded_shape,
0,
output_mem_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
output_dtype=self.model_config["ACTIVATIONS_DTYPE"],
memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
dtype=self.model_config["ACTIVATIONS_DTYPE"],
)

x = self.fc(x)
Expand Down Expand Up @@ -975,12 +975,12 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
_nearest_32(unpadded_shape[2]),
_nearest_32(unpadded_shape[3]),
]
x = ttnn.experimental.tensor.tilize_with_val_padding(
x = ttnn.tilize_with_val_padding(
x,
padded_shape,
0,
output_mem_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
output_dtype=self.model_config["ACTIVATIONS_DTYPE"],
memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
dtype=self.model_config["ACTIVATIONS_DTYPE"],
)

x = self.avgpool(x, memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG)
Expand All @@ -1007,12 +1007,12 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
_nearest_32(unpadded_shape[3]),
]

x = ttnn.experimental.tensor.tilize_with_val_padding(
x = ttnn.tilize_with_val_padding(
x,
padded_shape,
0,
output_mem_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
output_dtype=self.model_config["ACTIVATIONS_DTYPE"],
memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
dtype=self.model_config["ACTIVATIONS_DTYPE"],
)

x = self.fc(x)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1099,12 +1099,12 @@ def run(self, input_tensor, device, ops_parallel_config, conv_op_cache={}) -> tt
_nearest_32(unpadded_shape[2]),
_nearest_32(unpadded_shape[3]),
]
x = ttnn.experimental.tensor.tilize_with_val_padding(
x = ttnn.tilize_with_val_padding(
x,
padded_shape,
0,
output_mem_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
output_dtype=self.model_config["ACTIVATIONS_DTYPE"],
memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
dtype=self.model_config["ACTIVATIONS_DTYPE"],
)

x = self.avgpool(x, memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG)
Expand All @@ -1131,12 +1131,12 @@ def run(self, input_tensor, device, ops_parallel_config, conv_op_cache={}) -> tt
_nearest_32(unpadded_shape[3]),
]

x = ttnn.experimental.tensor.tilize_with_val_padding(
x = ttnn.tilize_with_val_padding(
x,
padded_shape,
0,
output_mem_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
output_dtype=self.model_config["ACTIVATIONS_DTYPE"],
memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
dtype=self.model_config["ACTIVATIONS_DTYPE"],
)

x = self.fc(x)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -749,12 +749,12 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
_nearest_32(unpadded_shape[2]),
_nearest_32(unpadded_shape[3]),
]
x = ttnn.experimental.tensor.tilize_with_val_padding(
x = ttnn.tilize_with_val_padding(
x,
padded_shape,
0,
output_mem_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
output_dtype=self.model_config["ACTIVATIONS_DTYPE"],
memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
dtype=self.model_config["ACTIVATIONS_DTYPE"],
)

x = self.avgpool(x, memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG)
Expand All @@ -781,12 +781,12 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
_nearest_32(unpadded_shape[3]),
]

x = ttnn.experimental.tensor.tilize_with_val_padding(
x = ttnn.tilize_with_val_padding(
x,
padded_shape,
0,
output_mem_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
output_dtype=self.model_config["ACTIVATIONS_DTYPE"],
memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
dtype=self.model_config["ACTIVATIONS_DTYPE"],
)

x = self.fc(x)
Expand Down Expand Up @@ -952,12 +952,12 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
_nearest_32(unpadded_shape[2]),
_nearest_32(unpadded_shape[3]),
]
x = ttnn.experimental.tensor.tilize_with_val_padding(
x = ttnn.tilize_with_val_padding(
x,
padded_shape,
0,
output_mem_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
output_dtype=self.model_config["ACTIVATIONS_DTYPE"],
memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
dtype=self.model_config["ACTIVATIONS_DTYPE"],
)

x = self.avgpool(x, memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG)
Expand All @@ -984,12 +984,12 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
_nearest_32(unpadded_shape[3]),
]

x = ttnn.experimental.tensor.tilize_with_val_padding(
x = ttnn.tilize_with_val_padding(
x,
padded_shape,
0,
output_mem_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
output_dtype=self.model_config["ACTIVATIONS_DTYPE"],
memory_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG,
dtype=self.model_config["ACTIVATIONS_DTYPE"],
)

x = self.fc(x)
Expand Down
Loading

0 comments on commit 0ecbb36

Please sign in to comment.