From 11b8754e448a868ab1ef9ee45ffb49ccb44ae0b1 Mon Sep 17 00:00:00 2001 From: Pavle Josipovic Date: Sun, 15 Dec 2024 14:35:52 +0000 Subject: [PATCH] revert 1 --- .../operations/conv/conv2d/conv2d_utils.cpp | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp index 08111cc07d9b..0eb90a4df4c4 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp @@ -698,7 +698,6 @@ std::tuple shard_or_re // In case we are in auto sharded codepath and convolution maps to matmul // Skip sharding of the input tensor and run the matmul out of interleaved tensor. - bool auto_shard_mm = auto_shard && is_mm_conv; if (input_tensor_on_device) { if (is_mm_conv && input_tensor.layout() == Layout::ROW_MAJOR && parallel_config.shard_scheme != TensorMemoryLayout::HEIGHT_SHARDED) { @@ -706,17 +705,24 @@ std::tuple shard_or_re input_tensor = ttnn::to_layout(input_tensor, Layout::TILE, std::nullopt, std::nullopt, input_tensor.device()); } - if (!auto_shard_mm) { - auto resharded_input_tensor = ttnn::to_memory_config( - input_tensor, input_tensor_sharded_memory_config, std::nullopt); - if (conv_config.deallocate_activation) { - input_tensor.deallocate(); - resharded_input_tensor = ttnn::operations::core::reallocate(resharded_input_tensor, resharded_input_tensor.memory_config()); - } - input_tensor = resharded_input_tensor; + auto resharded_input_tensor = ttnn::to_memory_config( + input_tensor, input_tensor_sharded_memory_config, std::nullopt); + if (conv_config.deallocate_activation) { + input_tensor.deallocate(); + resharded_input_tensor = ttnn::operations::core::reallocate(resharded_input_tensor, resharded_input_tensor.memory_config()); } + input_tensor = resharded_input_tensor; } else { - input_tensor = ttnn::to_device(input_tensor, device, (auto_shard_mm ? ttnn::DRAM_MEMORY_CONFIG : input_tensor_sharded_memory_config)); + if (is_mm_conv && input_tensor.layout() == Layout::ROW_MAJOR && + parallel_config.shard_scheme != TensorMemoryLayout::HEIGHT_SHARDED) { + // Workaround #13979 ttnn::tilize doesn't support BLOCK_SHARDED layout + input_tensor = ttnn::to_device(input_tensor, device, std::nullopt); + input_tensor = + ttnn::to_layout(input_tensor, Layout::TILE, std::nullopt, std::nullopt, input_tensor.device()); + input_tensor = ttnn::to_memory_config(input_tensor, input_tensor_sharded_memory_config, std::nullopt); + } else { + input_tensor = ttnn::to_device(input_tensor, device, input_tensor_sharded_memory_config); + } } } return {input_tensor, parallel_config, output_parallel_config, needs_shard_or_reshard, use_non_tile_height};