Skip to content

Commit

Permalink
#4625: Enable multicore support for untilize with unpadding on interl…
Browse files Browse the repository at this point in the history
…eaved tensors

This commit adds the ability to run the untilize_with_unpadding op on interleaved tensors across multiple cores.
  • Loading branch information
yan-zaretskiy committed May 17, 2024
1 parent 87a78d6 commit 657a54f
Show file tree
Hide file tree
Showing 20 changed files with 809 additions and 532 deletions.
4 changes: 1 addition & 3 deletions models/demos/resnet/tt/metalResnetBlock50.py
Original file line number Diff line number Diff line change
Expand Up @@ -2217,7 +2217,6 @@ def forward(self, x: tt_lib.tensor) -> tt_lib.tensor:
unpadded_shape = x.shape_without_padding()
x = tt_lib.tensor.untilize_with_unpadding(
x,
(0, 0, 0, 0),
(unpadded_shape[0] - 1, unpadded_shape[1] - 1, unpadded_shape[2] - 1, unpadded_shape[3] - 1),
self.memory_config,
)
Expand Down Expand Up @@ -2274,7 +2273,7 @@ def forward(self, x: tt_lib.tensor) -> tt_lib.tensor:
]
if self.sharded:
x = tt_lib.tensor.untilize_with_unpadding(
x, (0, 0, 0, 0), unpadded_shape_end, output_mem_config=self.width_sharded_memory_config
x, unpadded_shape_end, output_mem_config=self.width_sharded_memory_config
)
else:
x = tt_lib.tensor.untilize(x, self.memory_config, use_multicore=True)
Expand Down Expand Up @@ -2313,7 +2312,6 @@ def forward(self, x: tt_lib.tensor) -> tt_lib.tensor:
desired_shape[-1] = 1000
x = tt_lib.tensor.untilize_with_unpadding(
x,
[0, 0, 0, 0],
(desired_shape[0] - 1, desired_shape[1] - 1, desired_shape[2] - 1, desired_shape[3] - 1),
self.memory_config,
)
Expand Down
4 changes: 1 addition & 3 deletions models/experimental/resnet/tt/ttnn_functional_resnet50.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,6 @@ def __call__(self, input_tensor) -> ttnn.Tensor:
unpadded_shape = x.shape_without_padding()
x = ttnn.experimental.tensor.untilize_with_unpadding(
x,
(0, 0, 0, 0),
(unpadded_shape[0] - 1, unpadded_shape[1] - 1, unpadded_shape[2] - 1, unpadded_shape[3] - 1),
ttnn.L1_MEMORY_CONFIG,
)
Expand Down Expand Up @@ -735,7 +734,7 @@ def __call__(self, input_tensor) -> ttnn.Tensor:
x.get_legacy_shape()[3] - 1,
]
x = ttnn.experimental.tensor.untilize_with_unpadding(
x, (0, 0, 0, 0), unpadded_shape_end, output_mem_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG
x, unpadded_shape_end, output_mem_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG
)

x = ttnn.reshape(
Expand Down Expand Up @@ -763,7 +762,6 @@ def __call__(self, input_tensor) -> ttnn.Tensor:
desired_shape[-1] = 1000
x = ttnn.experimental.tensor.untilize_with_unpadding(
x,
[0, 0, 0, 0],
(desired_shape[0] - 1, desired_shape[1] - 1, desired_shape[2] - 1, desired_shape[3] - 1),
ttnn.L1_MEMORY_CONFIG,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def run_untilize_with_unpadding_tests(
in_mem_config,
out_mem_config,
data_seed,
output_tensor_start,
output_tensor_end,
device,
):
Expand All @@ -38,12 +37,11 @@ def run_untilize_with_unpadding_tests(
# compute ref value
x_ref = x.detach().clone()
ref_value = pytorch_ops.untilize_with_unpadding(
x_ref, output_tensor_start=output_tensor_start, output_tensor_end=output_tensor_end
x_ref, output_tensor_end=output_tensor_end
)

tt_result = tt_untilize_with_unpadding(
x=x,
output_tensor_start=output_tensor_start,
output_tensor_end=output_tensor_end,
device=device,
dtype=[dtype],
Expand All @@ -68,14 +66,13 @@ def run_untilize_with_unpadding_tests(
"SYSTEM_MEMORY",
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
5263366,
[0, 0, 0, 0],
[10, 9, 4, 1],
),
]


@pytest.mark.parametrize(
"input_shape, dtype, dlayout, in_mem_config, out_mem_config, data_seed, output_tensor_start, output_tensor_end",
"input_shape, dtype, dlayout, in_mem_config, out_mem_config, data_seed, output_tensor_end",
(test_sweep_args),
)
def test_untilize_with_unpadding_test(
Expand All @@ -85,7 +82,6 @@ def test_untilize_with_unpadding_test(
in_mem_config,
out_mem_config,
data_seed,
output_tensor_start,
output_tensor_end,
device,
):
Expand All @@ -97,7 +93,6 @@ def test_untilize_with_unpadding_test(
in_mem_config,
out_mem_config,
data_seed,
output_tensor_start,
output_tensor_end,
device,
)
Original file line number Diff line number Diff line change
Expand Up @@ -823,14 +823,13 @@ def gen_untilize_with_unpadding_args(
input_shapes, dtypes, layouts, mem_configs, do_sanitize_args=do_sanitize_args
):
if input_info is not None:
output_tensor_start = [0, 0, 0, 0]
output_tensor_end = [random.randrange(output_tensor_start[i], input_shapes[0][i], 1) for i in range(4)]
output_tensor_end = [random.randrange(0, input_shapes[0][i], 1) for i in range(4)]
if output_tensor_end[-1] % 2 == 0:
output_tensor_end[-1] += 1
input_info.update(
{
"output_tensor_start": output_tensor_start,
"output_tensor_end": output_tensor_end,
"use_multicore": True,
}
)
yield input_info
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@


from tests.tt_eager.python_api_testing.sweep_tests import comparison_funcs, generation_funcs
from tests.tt_eager.python_api_testing.sweep_tests.run_pytorch_ci_tests import run_single_pytorch_test
from tests.tt_eager.python_api_testing.sweep_tests.run_pytorch_ci_tests import (
run_single_pytorch_test,
)
import tt_lib as ttl


Expand Down Expand Up @@ -38,7 +40,6 @@ def create_grid(x, y):
"output_mem_config": ttl.tensor.MemoryConfig(
ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM
),
"output_tensor_start": [0, 0, 0, 0],
"output_tensor_end": [0, 0, 119, 7299],
},
)
Expand Down
10 changes: 5 additions & 5 deletions tests/tt_eager/python_api_testing/sweep_tests/pytorch_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1200,13 +1200,13 @@ def tilize_with_val_padding(x, output_tensor_shape, pad_value, *args, **kwargs):
return tilized


def untilize_with_unpadding(x, output_tensor_start, output_tensor_end, *args, **kwargs):
def untilize_with_unpadding(x, output_tensor_end, *args, **kwargs):
untilized = untilize_util(x)
unpad = untilized[
output_tensor_start[0] : output_tensor_end[0] + 1,
output_tensor_start[1] : output_tensor_end[1] + 1,
output_tensor_start[2] : output_tensor_end[2] + 1,
output_tensor_start[3] : output_tensor_end[3] + 1,
: output_tensor_end[0] + 1,
: output_tensor_end[1] + 1,
: output_tensor_end[2] + 1,
: output_tensor_end[3] + 1,
]
return unpad

Expand Down
5 changes: 1 addition & 4 deletions tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -2063,7 +2063,6 @@ def untilize_with_unpadding(
layout,
input_mem_config,
output_mem_config,
output_tensor_start,
output_tensor_end,
**kwargs,
):
Expand All @@ -2084,9 +2083,7 @@ def untilize_with_unpadding(
input_mem_config[0],
)

t1 = ttl.tensor.untilize_with_unpadding(
t0, output_tensor_start, output_tensor_end, output_mem_config=output_mem_config
)
t1 = ttl.tensor.untilize_with_unpadding(t0, output_tensor_end, output_mem_config=output_mem_config)

return tt2torch_tensor(t1)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1655,7 +1655,6 @@ def test_block_sharded_untilize_with_unpadding(in_sharded, out_sharded, dtype, d

yt = ttl.tensor.untilize_with_unpadding(
xt,
ttl.tensor.Shape([0, 0, 0, 0]),
ttl.tensor.Shape([0, 0, 391, 511]),
output_mem_config=out_mem_config,
)
Expand Down Expand Up @@ -1744,7 +1743,6 @@ def test_width_sharded_untilize_with_unpadding(

yt = ttl.tensor.untilize_with_unpadding(
xt,
ttl.tensor.Shape([0, 0, 0, 0]),
ttl.tensor.Shape([N - 1, C - 1, output_H - 1, W - 1]),
output_mem_config=out_mem_config,
)
Expand Down
2 changes: 0 additions & 2 deletions tt_eager/tt_dnn/op_library/auto_format.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ Tensor AutoFormat::format_output_tensor(
} else if (formatted_output.get_layout() == Layout::TILE && AutoFormat::legal_rm_shape(shape)) {
formatted_output = untilize_with_unpadding(
formatted_output,
{0, 0, 0, 0},
{shape[0] - 1, shape[1] - 1, shape[2] - 1, shape[3] - 1},
mem_config);
return formatted_output;
Expand All @@ -163,7 +162,6 @@ Tensor AutoFormat::format_output_tensor(
AutoFormat::legal_rm_shape(shape)) {
formatted_output = untilize_with_unpadding(
formatted_output,
{0, 0, 0, 0},
{shape[0] - 1, shape[1] - 1, shape[2] - 1, shape[3] - 1},
mem_config);
return formatted_output;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,8 @@ void kernel_main() {

cb_reserve_back(cb_id_in0, num_tiles_per_row * has_rows);
uint32_t l1_write_addr = get_write_ptr(cb_id_in0);
uint32_t curr_stick_id = base_stick_id;
for (uint32_t k = 0; k < num_rows; k++) {
uint64_t src_noc_addr = get_noc_addr(curr_stick_id + k, s);
uint64_t src_noc_addr = get_noc_addr(base_stick_id + k, s);

// Read from DRAM to tmp buffer
noc_async_read(src_noc_addr, l1_write_addr, unpadded_X_size);
Expand Down
113 changes: 0 additions & 113 deletions tt_eager/tt_dnn/op_library/tilize/tilize_multi_core/padding.h

This file was deleted.

Loading

0 comments on commit 657a54f

Please sign in to comment.