Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#4625: Multicore runs for untilize with unpadding on interleaved tensors #8622

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions models/demos/resnet/tt/metalResnetBlock50.py
Original file line number Diff line number Diff line change
Expand Up @@ -2217,7 +2217,6 @@ def forward(self, x: tt_lib.tensor) -> tt_lib.tensor:
unpadded_shape = x.shape_without_padding()
x = tt_lib.tensor.untilize_with_unpadding(
x,
(0, 0, 0, 0),
(unpadded_shape[0] - 1, unpadded_shape[1] - 1, unpadded_shape[2] - 1, unpadded_shape[3] - 1),
self.memory_config,
)
Expand Down Expand Up @@ -2274,7 +2273,7 @@ def forward(self, x: tt_lib.tensor) -> tt_lib.tensor:
]
if self.sharded:
x = tt_lib.tensor.untilize_with_unpadding(
x, (0, 0, 0, 0), unpadded_shape_end, output_mem_config=self.width_sharded_memory_config
x, unpadded_shape_end, output_mem_config=self.width_sharded_memory_config
)
else:
x = tt_lib.tensor.untilize(x, self.memory_config, use_multicore=True)
Expand Down Expand Up @@ -2313,7 +2312,6 @@ def forward(self, x: tt_lib.tensor) -> tt_lib.tensor:
desired_shape[-1] = 1000
x = tt_lib.tensor.untilize_with_unpadding(
x,
[0, 0, 0, 0],
(desired_shape[0] - 1, desired_shape[1] - 1, desired_shape[2] - 1, desired_shape[3] - 1),
self.memory_config,
)
Expand Down
4 changes: 1 addition & 3 deletions models/experimental/resnet/tt/ttnn_functional_resnet50.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,6 @@ def __call__(self, input_tensor) -> ttnn.Tensor:
unpadded_shape = x.shape_without_padding()
x = ttnn.experimental.tensor.untilize_with_unpadding(
x,
(0, 0, 0, 0),
(unpadded_shape[0] - 1, unpadded_shape[1] - 1, unpadded_shape[2] - 1, unpadded_shape[3] - 1),
ttnn.L1_MEMORY_CONFIG,
)
Expand Down Expand Up @@ -735,7 +734,7 @@ def __call__(self, input_tensor) -> ttnn.Tensor:
x.get_legacy_shape()[3] - 1,
]
x = ttnn.experimental.tensor.untilize_with_unpadding(
x, (0, 0, 0, 0), unpadded_shape_end, output_mem_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG
x, unpadded_shape_end, output_mem_config=ttnn.L1_WIDTH_SHARDED_MEMORY_CONFIG
)

x = ttnn.reshape(
Expand Down Expand Up @@ -763,7 +762,6 @@ def __call__(self, input_tensor) -> ttnn.Tensor:
desired_shape[-1] = 1000
x = ttnn.experimental.tensor.untilize_with_unpadding(
x,
[0, 0, 0, 0],
(desired_shape[0] - 1, desired_shape[1] - 1, desired_shape[2] - 1, desired_shape[3] - 1),
ttnn.L1_MEMORY_CONFIG,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def run_untilize_with_unpadding_tests(
in_mem_config,
out_mem_config,
data_seed,
output_tensor_start,
output_tensor_end,
device,
):
Expand All @@ -37,13 +36,10 @@ def run_untilize_with_unpadding_tests(
x = gen_rand(size=input_shape, low=-100, high=100).to(torch.bfloat16)
# compute ref value
x_ref = x.detach().clone()
ref_value = pytorch_ops.untilize_with_unpadding(
x_ref, output_tensor_start=output_tensor_start, output_tensor_end=output_tensor_end
)
ref_value = pytorch_ops.untilize_with_unpadding(x_ref, output_tensor_end=output_tensor_end)

tt_result = tt_untilize_with_unpadding(
x=x,
output_tensor_start=output_tensor_start,
output_tensor_end=output_tensor_end,
device=device,
dtype=[dtype],
Expand All @@ -68,14 +64,13 @@ def run_untilize_with_unpadding_tests(
"SYSTEM_MEMORY",
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
5263366,
[0, 0, 0, 0],
[10, 9, 4, 1],
),
]


@pytest.mark.parametrize(
"input_shape, dtype, dlayout, in_mem_config, out_mem_config, data_seed, output_tensor_start, output_tensor_end",
"input_shape, dtype, dlayout, in_mem_config, out_mem_config, data_seed, output_tensor_end",
(test_sweep_args),
)
def test_untilize_with_unpadding_test(
Expand All @@ -85,7 +80,6 @@ def test_untilize_with_unpadding_test(
in_mem_config,
out_mem_config,
data_seed,
output_tensor_start,
output_tensor_end,
device,
):
Expand All @@ -97,7 +91,6 @@ def test_untilize_with_unpadding_test(
in_mem_config,
out_mem_config,
data_seed,
output_tensor_start,
output_tensor_end,
device,
)
Original file line number Diff line number Diff line change
Expand Up @@ -823,14 +823,13 @@ def gen_untilize_with_unpadding_args(
input_shapes, dtypes, layouts, mem_configs, do_sanitize_args=do_sanitize_args
):
if input_info is not None:
output_tensor_start = [0, 0, 0, 0]
output_tensor_end = [random.randrange(output_tensor_start[i], input_shapes[0][i], 1) for i in range(4)]
output_tensor_end = [random.randrange(0, input_shapes[0][i], 1) for i in range(4)]
if output_tensor_end[-1] % 2 == 0:
output_tensor_end[-1] += 1
input_info.update(
{
"output_tensor_start": output_tensor_start,
"output_tensor_end": output_tensor_end,
"use_multicore": True,
}
)
yield input_info
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@


from tests.tt_eager.python_api_testing.sweep_tests import comparison_funcs, generation_funcs
from tests.tt_eager.python_api_testing.sweep_tests.run_pytorch_ci_tests import run_single_pytorch_test
from tests.tt_eager.python_api_testing.sweep_tests.run_pytorch_ci_tests import (
run_single_pytorch_test,
)
import tt_lib as ttl


Expand Down Expand Up @@ -38,7 +40,6 @@ def create_grid(x, y):
"output_mem_config": ttl.tensor.MemoryConfig(
ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM
),
"output_tensor_start": [0, 0, 0, 0],
"output_tensor_end": [0, 0, 119, 7299],
},
)
Expand Down
10 changes: 5 additions & 5 deletions tests/tt_eager/python_api_testing/sweep_tests/pytorch_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1200,13 +1200,13 @@ def tilize_with_val_padding(x, output_tensor_shape, pad_value, *args, **kwargs):
return tilized


def untilize_with_unpadding(x, output_tensor_start, output_tensor_end, *args, **kwargs):
def untilize_with_unpadding(x, output_tensor_end, *args, **kwargs):
untilized = untilize_util(x)
unpad = untilized[
output_tensor_start[0] : output_tensor_end[0] + 1,
output_tensor_start[1] : output_tensor_end[1] + 1,
output_tensor_start[2] : output_tensor_end[2] + 1,
output_tensor_start[3] : output_tensor_end[3] + 1,
: output_tensor_end[0] + 1,
: output_tensor_end[1] + 1,
: output_tensor_end[2] + 1,
: output_tensor_end[3] + 1,
]
return unpad

Expand Down
5 changes: 1 addition & 4 deletions tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -2063,7 +2063,6 @@ def untilize_with_unpadding(
layout,
input_mem_config,
output_mem_config,
output_tensor_start,
output_tensor_end,
**kwargs,
):
Expand All @@ -2084,9 +2083,7 @@ def untilize_with_unpadding(
input_mem_config[0],
)

t1 = ttl.tensor.untilize_with_unpadding(
t0, output_tensor_start, output_tensor_end, output_mem_config=output_mem_config
)
t1 = ttl.tensor.untilize_with_unpadding(t0, output_tensor_end, output_mem_config=output_mem_config)

return tt2torch_tensor(t1)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1655,7 +1655,6 @@ def test_block_sharded_untilize_with_unpadding(in_sharded, out_sharded, dtype, d

yt = ttl.tensor.untilize_with_unpadding(
xt,
ttl.tensor.Shape([0, 0, 0, 0]),
ttl.tensor.Shape([0, 0, 391, 511]),
output_mem_config=out_mem_config,
)
Expand Down Expand Up @@ -1744,7 +1743,6 @@ def test_width_sharded_untilize_with_unpadding(

yt = ttl.tensor.untilize_with_unpadding(
xt,
ttl.tensor.Shape([0, 0, 0, 0]),
ttl.tensor.Shape([N - 1, C - 1, output_H - 1, W - 1]),
output_mem_config=out_mem_config,
)
Expand Down
2 changes: 0 additions & 2 deletions tt_eager/tt_dnn/op_library/auto_format.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ Tensor AutoFormat::format_output_tensor(
} else if (formatted_output.get_layout() == Layout::TILE && AutoFormat::legal_rm_shape(shape)) {
formatted_output = untilize_with_unpadding(
formatted_output,
{0, 0, 0, 0},
{shape[0] - 1, shape[1] - 1, shape[2] - 1, shape[3] - 1},
mem_config);
return formatted_output;
Expand All @@ -163,7 +162,6 @@ Tensor AutoFormat::format_output_tensor(
AutoFormat::legal_rm_shape(shape)) {
formatted_output = untilize_with_unpadding(
formatted_output,
{0, 0, 0, 0},
{shape[0] - 1, shape[1] - 1, shape[2] - 1, shape[3] - 1},
mem_config);
return formatted_output;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,8 @@ void kernel_main() {

cb_reserve_back(cb_id_in0, num_tiles_per_row * has_rows);
uint32_t l1_write_addr = get_write_ptr(cb_id_in0);
uint32_t curr_stick_id = base_stick_id;
for (uint32_t k = 0; k < num_rows; k++) {
uint64_t src_noc_addr = get_noc_addr(curr_stick_id + k, s);
uint64_t src_noc_addr = get_noc_addr(base_stick_id + k, s);

// Read from DRAM to tmp buffer
noc_async_read(src_noc_addr, l1_write_addr, unpadded_X_size);
Expand Down
113 changes: 0 additions & 113 deletions tt_eager/tt_dnn/op_library/tilize/tilize_multi_core/padding.h

This file was deleted.

Loading
Loading