From 29803c9f428be6ff483af4b44b56d5e599cdaa05 Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Fri, 20 Dec 2024 06:43:57 +0000 Subject: [PATCH 1/9] #0: Fix failing test case for width sharded non-32 multiple output width --- .../ttnn/operations/conv/conv2d/conv2d.cpp | 1 + ...onv2d_op_width_sharded_program_factory.cpp | 2 +- .../conv/conv2d/prepare_conv2d_weights.cpp | 42 +++++++++---------- .../conv/conv2d/prepare_conv2d_weights.hpp | 5 ++- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp index 7f0e355b594..ac714aeccbf 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp @@ -158,6 +158,7 @@ Result conv2d( opt_conv_op_block_config.act_block_w_ntiles, opt_conv_op_block_config.out_subblock_w_ntiles, parallel_config, + output_parallel_config, device, groups, opt_conv_op_block_config.act_block_h_ntiles, diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_width_sharded_program_factory.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_width_sharded_program_factory.cpp index bf8e12c6aa8..25070d665a3 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_width_sharded_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_width_sharded_program_factory.cpp @@ -368,7 +368,7 @@ tt::tt_metal::operation::ProgramWithCallbacks multi_core_optimized_conv_width_sh uint32_t num_groups = num_blocks_act_h * num_blocks_act_w * num_blocks_weight_w; // writer of conv op partially removes padding on the width // it removes the padding done for block width but it doesn't remove padding done for tiled width - uint32_t output_channels_padded_to_tile_width = round_up(output_channels, input_num_cores * TILE_WIDTH); + uint32_t output_channels_padded_to_tile_width = round_up(output_channels, output_num_cores * TILE_WIDTH); TT_FATAL( output_channels_padded_to_tile_width <= weight_matrix_width, "output_channels_padded_to_tile_width {} should be less than or equal to weight_matrix_width {}", diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp index a3f39ce5c77..eca2b2a6c09 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp @@ -664,8 +664,9 @@ std::pair> prepare_conv_weights_biases DataType weights_bias_dtype, uint32_t weight_block_h_ntiles, uint32_t weight_block_w_ntiles, - const ParallelConfig& parallel_config, - T* device, + const ParallelConfig& input_parallel_config, + const ParallelConfig& output_parallel_config, + T * device, uint32_t groups, uint32_t act_block_h_ntiles, uint32_t input_width, @@ -705,9 +706,11 @@ std::pair> prepare_conv_weights_biases uint32_t window_h = weights_shape[2]; uint32_t window_w = weights_shape[3]; - uint32_t num_cores_channels = get_num_cores_channels_from_parallel_config(parallel_config); - uint32_t out_channels_padded = tt::round_up(out_channels, num_cores_channels * tt::constants::TILE_WIDTH); - uint32_t in_channels_padded = tt::round_up(in_channels, num_cores_channels * input_channels_alignment); + uint32_t input_num_cores_channels = get_num_cores_channels_from_parallel_config(input_parallel_config); + uint32_t output_num_cores_channels = get_num_cores_channels_from_parallel_config(output_parallel_config); + + uint32_t out_channels_padded = tt::round_up(out_channels, output_num_cores_channels * tt::constants::TILE_WIDTH); + uint32_t in_channels_padded = tt::round_up(in_channels, input_num_cores_channels * input_channels_alignment); uint32_t out_channel_padding = out_channels_padded - out_channels; tt::tt_metal::LegacyShape weights_channels_padded_shape = tt::tt_metal::LegacyShape( @@ -733,12 +736,12 @@ std::pair> prepare_conv_weights_biases ttnn::pad(weight_tensor_, weights_channels_padded_shape.to_array_4D(), tt::tt_metal::Array4D({0, 0, 0, 0}), 0); // for conv op, pad the weights to block shape - if (parallel_config.shard_scheme == TensorMemoryLayout::HEIGHT_SHARDED) { - weight_tensor_ = convert_conv_weight_tensor_to_special_padding_tiled_layout( + if (input_parallel_config.shard_scheme == TensorMemoryLayout::HEIGHT_SHARDED) { + weight_tensor_ = tt::tt_metal::convert_conv_weight_tensor_to_special_padding_tiled_layout( weight_tensor_, weight_block_h_ntiles, weight_block_w_ntiles, weights_bias_dtype); - } else if (parallel_config.shard_scheme == TensorMemoryLayout::BLOCK_SHARDED) { - weight_tensor_ = convert_conv_weight_tensor_to_tiled_layout_block_sharded( - weight_tensor_, num_cores_channels, weights_bias_dtype); + } else if(input_parallel_config.shard_scheme == TensorMemoryLayout::BLOCK_SHARDED) { + weight_tensor_ = tt::tt_metal::convert_conv_weight_tensor_to_tiled_layout_block_sharded( + weight_tensor_, input_num_cores_channels, weights_bias_dtype); } else { weight_tensor_ = convert_conv_weight_tensor_to_tiled_layout( weight_tensor_, weight_block_h_ntiles, weight_block_w_ntiles, weights_bias_dtype); @@ -764,16 +767,8 @@ std::pair> prepare_conv_weights_biases if (bias_tensor.has_value()) { bias_tensor_ = bias_tensor.value(); bool is_bias_tensor_is_on_device = ttnn::is_tensor_on_device_or_multidevice(bias_tensor_); - if (!is_bias_tensor_is_on_device) { - bias_tensor_ = conv_bias_layout_convert( - bias_tensor_, - weights_bias_dtype, - weight_block_h_ntiles, - weight_block_w_ntiles, - parallel_config, - device, - out_channels, - is_non_tile_mul_width); + if(!is_bias_tensor_is_on_device) { + bias_tensor_ = conv_bias_layout_convert(bias_tensor_, weights_bias_dtype, weight_block_h_ntiles, weight_block_w_ntiles, input_parallel_config, device, out_channels, is_non_tile_mul_width); bias_tensor_ = ttnn::operations::core::to_device(bias_tensor_, device, std::nullopt); } } @@ -859,6 +854,7 @@ ttnn::Tensor prepare_conv_weights( opt_conv_op_block_config.act_block_w_ntiles, opt_conv_op_block_config.out_subblock_w_ntiles, parallel_config, + parallel_config, device, groups, opt_conv_op_block_config.act_block_h_ntiles, @@ -1027,7 +1023,8 @@ template std::pair> prepare_conv_weigh DataType weights_bias_dtype, uint32_t weight_block_h_ntiles, uint32_t weight_block_w_ntiles, - const ParallelConfig& parallel_config, + const ParallelConfig& input_parallel_config, + const ParallelConfig& output_parallel_config, Device* device, uint32_t groups, uint32_t act_block_h_ntiles, @@ -1043,7 +1040,8 @@ prepare_conv_weights_biases_and_move_to_device( DataType weights_bias_dtype, uint32_t weight_block_h_ntiles, uint32_t weight_block_w_ntiles, - const ParallelConfig& parallel_config, + const ParallelConfig& input_parallel_config, + const ParallelConfig& output_parallel_config, MeshDevice* device, uint32_t groups, uint32_t act_block_h_ntiles, diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.hpp b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.hpp index 2c4b7f8eab1..a7c724a45d8 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.hpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.hpp @@ -111,8 +111,9 @@ std::pair> prepare_conv_weights_biases DataType weights_bias_dtype, uint32_t weight_block_h_ntiles, uint32_t weight_block_w_ntiles, - const sliding_window::ParallelConfig& parallel_config, - T* device, + const sliding_window::ParallelConfig& input_parallel_config, + const sliding_window::ParallelConfig& output_parallel_config, + T * device, uint32_t groups, uint32_t act_block_h_ntiles, uint32_t input_width, From 16142d955af93774b36321fb5d00a4a0256a0247 Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Fri, 20 Dec 2024 18:03:24 +0000 Subject: [PATCH 2/9] #0: Clean up --- .../sweeps/conv2d/short/conv2d_short_sweep.py | 1 - ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp | 2 +- ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.hpp | 6 ++++++ .../ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp | 5 ++++- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/sweep_framework/sweeps/conv2d/short/conv2d_short_sweep.py b/tests/sweep_framework/sweeps/conv2d/short/conv2d_short_sweep.py index dfb7d5e04b9..56d9790840f 100644 --- a/tests/sweep_framework/sweeps/conv2d/short/conv2d_short_sweep.py +++ b/tests/sweep_framework/sweeps/conv2d/short/conv2d_short_sweep.py @@ -1623,7 +1623,6 @@ def test_conv2d_localrun(device, input_spec): [1, 1056, 1056, 96, 96, 3, 3, 2, 2, 1, 1, 4, False, 1], # 127 [1, 528, 528, 192, 192, 3, 3, 2, 2, 1, 1, 2, False, 1], # 220 [1, 2904, 2904, 48, 48, 3, 3, 2, 2, 1, 1, 11, False, 1], # 294 - [1, 3024, 1232, 14, 14, 1, 1, 2, 2, 0, 0, 1, False, 1], # 1421 [1, 819, 256, 100, 136, 3, 3, 1, 1, 1, 1, 1, True, 1], # 1443 [1, 819, 256, 50, 68, 3, 3, 1, 1, 1, 1, 1, True, 1], # 1447 [1, 1024, 3, 224, 224, 32, 32, 32, 32, 0, 0, 1, True, 1], # 1458 diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp index ac94433d535..4cd6fe1c57f 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp @@ -160,7 +160,7 @@ ParallelConfig determine_parallel_config( return pconfig; } -static ParallelConfig determine_output_parallel_config( +ParallelConfig determine_output_parallel_config( const ParallelConfig& input_parallel_config, const CoreCoord& compute_grid_size, uint32_t out_channels, diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.hpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.hpp index 9a5758872c2..f190345f506 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.hpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.hpp @@ -171,6 +171,12 @@ shard_or_reshard_tensor_if_required( bool auto_shard, bool is_non_tile_mul_width = false); +sliding_window::ParallelConfig determine_output_parallel_config( + const sliding_window::ParallelConfig& input_parallel_config, + const CoreCoord& compute_grid_size, + uint32_t out_channels, + bool is_mm_conv); + std::ostream& operator<<(std::ostream& os, const Conv2dConfig& config); } // namespace operations::conv diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp index eca2b2a6c09..2c2f8b0c4b2 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp @@ -842,6 +842,9 @@ ttnn::Tensor prepare_conv_weights( shard_orientation, !use_non_tile_height); + ParallelConfig output_parallel_config = + determine_output_parallel_config(parallel_config, device->compute_with_storage_grid_size(), out_channels, mm_conv); + bool is_non_tile_mul_width = check_non_tile_mul_width(device, conv_config, in_channels); std::optional bias_tensor = std::nullopt; ttnn::Tensor weight_tensor_on_device = weight_tensor; @@ -854,7 +857,7 @@ ttnn::Tensor prepare_conv_weights( opt_conv_op_block_config.act_block_w_ntiles, opt_conv_op_block_config.out_subblock_w_ntiles, parallel_config, - parallel_config, + output_parallel_config, device, groups, opt_conv_op_block_config.act_block_h_ntiles, From 2f7407e6dbab5efec2429e1d82437609f5c1740c Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Sat, 21 Dec 2024 08:14:39 +0000 Subject: [PATCH 3/9] #0: Fix failing UT --- tests/ttnn/unit_tests/operations/test_new_conv2d.py | 4 +--- ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp | 6 +++--- .../operations/conv/conv2d/prepare_conv2d_weights.cpp | 11 ++++++----- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/tests/ttnn/unit_tests/operations/test_new_conv2d.py b/tests/ttnn/unit_tests/operations/test_new_conv2d.py index 4d790730c16..0beea0f771e 100644 --- a/tests/ttnn/unit_tests/operations/test_new_conv2d.py +++ b/tests/ttnn/unit_tests/operations/test_new_conv2d.py @@ -530,7 +530,6 @@ def test_conv_features_multi_device( @pytest.mark.parametrize( "batch_size, output_channels, input_channels, input_height, input_width, filter_height, filter_width, pad_h, pad_w, act_block_w_div", ( - (2, 128, 128, 9, 9, 3, 3, 0, 0, 1), (2, 128, 256, 9, 9, 3, 3, 1, 1, 1), (2, 576, 576, 9, 9, 3, 3, 0, 0, 1), (2, 960, 960, 5, 5, 3, 3, 0, 0, 1), @@ -538,12 +537,11 @@ def test_conv_features_multi_device( (2, 512, 2048, 17, 17, 3, 3, 1, 1, 1), (2, 768, 768, 17, 17, 3, 3, 0, 0, 1), (2, 1280, 2560, 15, 15, 3, 3, 1, 1, 2), - (2, 1280, 2560, 15, 15, 3, 3, 0, 0, 2), (2, 1280, 1280, 17, 17, 3, 3, 1, 1, 1), + [1, 3024, 1232, 14, 14, 1, 1, 0, 0, 1], (2, 768, 32, 9, 9, 3, 3, 1, 1, 1), (2, 64, 128, 9, 9, 3, 3, 1, 1, 1), (2, 32, 128, 9, 9, 3, 3, 1, 1, 1), - (1, 256, 256, 7, 7, 3, 3, 1, 1, 1), ), ) @pytest.mark.parametrize( diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp index ac714aeccbf..90e2d238dd7 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp @@ -60,7 +60,6 @@ Result conv2d( const uint32_t output_width = ((input_width - kernel_size[1] - ((kernel_size[0] - 1) * (dilation[0] - 1)) + 2 * padding[1]) / stride[1]) + 1; - Conv2dConfig conv_config = conv_config_.value_or(Conv2dConfig()); const auto compute_grid_size = device->compute_with_storage_grid_size(); bool auto_shard = false; @@ -195,8 +194,9 @@ Result conv2d( if (bypass_halo) { if (input_tensor_post_tm.layout() == Layout::TILE) { - input_tensor_post_tm = - ttnn::to_layout(input_tensor_post_tm, Layout::ROW_MAJOR, std::nullopt, std::nullopt, device); + input_tensor_post_tm = ttnn::reshape(input_tensor_post_tm, input_tensor_post_tm.get_padded_shape()); + input_tensor_post_tm = ttnn::to_layout( + input_tensor_post_tm, Layout::ROW_MAJOR, std::nullopt, std::nullopt, device); } } else { Tensor halo_output = ttnn::halo( diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp index 2c2f8b0c4b2..f7ddce08192 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp @@ -538,9 +538,7 @@ ttnn::Tensor conv_bias_layout_convert( validate_bias_tensor(bias_tensor_); if (!is_non_tile_mul_width) { auto bias_shape = bias_tensor_.get_shape(); - TT_FATAL( - bias_shape[3] == out_channels && bias_shape[0] == 1 && bias_shape[1] == 1 && bias_shape[2] == 1, - "bias shape is not correct"); + TT_FATAL(bias_shape[0] == 1 && bias_shape[1] == 1 && bias_shape[2] == 1, "bias shape is not correct"); tt::tt_metal::LegacyShape bias_channels_padded_shape = tt::tt_metal::LegacyShape( std::array({1, 1, 32, round_up(out_channels, weight_block_w_ntiles * 32)})); bias_tensor_ = @@ -768,7 +766,7 @@ std::pair> prepare_conv_weights_biases bias_tensor_ = bias_tensor.value(); bool is_bias_tensor_is_on_device = ttnn::is_tensor_on_device_or_multidevice(bias_tensor_); if(!is_bias_tensor_is_on_device) { - bias_tensor_ = conv_bias_layout_convert(bias_tensor_, weights_bias_dtype, weight_block_h_ntiles, weight_block_w_ntiles, input_parallel_config, device, out_channels, is_non_tile_mul_width); + bias_tensor_ = conv_bias_layout_convert(bias_tensor_, weights_bias_dtype, weight_block_h_ntiles, weight_block_w_ntiles, output_parallel_config, device, out_channels_padded, is_non_tile_mul_width); bias_tensor_ = ttnn::operations::core::to_device(bias_tensor_, device, std::nullopt); } } @@ -935,6 +933,9 @@ ttnn::Tensor prepare_conv_bias( shard_orientation, !use_non_tile_height); + ParallelConfig output_parallel_config = + determine_output_parallel_config(parallel_config, device->compute_with_storage_grid_size(), out_channels, mm_conv); + bool is_non_tile_mul_width = check_non_tile_mul_width(device, conv_config, in_channels); ttnn::Tensor bias_tensor_ = bias_tensor; bias_tensor_ = conv_bias_layout_convert( @@ -942,7 +943,7 @@ ttnn::Tensor prepare_conv_bias( conv_config.weights_dtype, opt_conv_op_block_config.act_block_h_ntiles, weight_block_w_ntiles, - parallel_config, + output_parallel_config, device, out_channels, is_non_tile_mul_width); From 55a25d5af57dce9dedcbe877d0beb19919cb3367 Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Sun, 22 Dec 2024 13:32:34 +0000 Subject: [PATCH 4/9] #0: Fix mm conv --- ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp index 90e2d238dd7..3869649b64f 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp @@ -54,9 +54,9 @@ Result conv2d( const std::optional& conv_config_, const std::optional& compute_config_, const std::optional& memory_config) { - const bool mm_conv = use_matmul_for_1x1_conv(kernel_size, stride, padding, dilation, groups); - const uint32_t output_height = - ((input_height - kernel_size[0] - ((kernel_size[0] - 1) * (dilation[0] - 1)) + 2 * padding[0]) / stride[0]) + 1; + Conv2dConfig conv_config = conv_config_.value_or(Conv2dConfig()); + const bool mm_conv = use_matmul_for_1x1_conv(kernel_size, stride, padding, dilation, groups) && !(conv_config.shard_layout.has_value() && conv_config.shard_layout.value() == TensorMemoryLayout::WIDTH_SHARDED); + const uint32_t output_height = ((input_height - kernel_size[0] - ((kernel_size[0] - 1 ) * (dilation[0] - 1)) + 2 * padding[0]) / stride[0]) + 1; const uint32_t output_width = ((input_width - kernel_size[1] - ((kernel_size[0] - 1) * (dilation[0] - 1)) + 2 * padding[1]) / stride[1]) + 1; From 24712c0064bef7c550e7a4ba5076ae3ea0452f0b Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Mon, 23 Dec 2024 14:03:12 +0000 Subject: [PATCH 5/9] #0: Clean up --- ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp | 2 +- .../ttnn/operations/conv/conv2d/conv2d_utils.cpp | 6 ++++-- .../ttnn/operations/conv/conv2d/conv2d_utils.hpp | 15 ++++++++------- .../conv/conv2d/prepare_conv2d_weights.cpp | 14 +++++++------- .../conv/conv_transpose2d/conv_transpose2d.cpp | 3 +-- 5 files changed, 21 insertions(+), 19 deletions(-) diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp index 3869649b64f..183c962f277 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp @@ -55,7 +55,7 @@ Result conv2d( const std::optional& compute_config_, const std::optional& memory_config) { Conv2dConfig conv_config = conv_config_.value_or(Conv2dConfig()); - const bool mm_conv = use_matmul_for_1x1_conv(kernel_size, stride, padding, dilation, groups) && !(conv_config.shard_layout.has_value() && conv_config.shard_layout.value() == TensorMemoryLayout::WIDTH_SHARDED); + const bool mm_conv = use_matmul_for_1x1_conv(kernel_size, stride, padding, dilation, groups, conv_config); const uint32_t output_height = ((input_height - kernel_size[0] - ((kernel_size[0] - 1 ) * (dilation[0] - 1)) + 2 * padding[0]) / stride[0]) + 1; const uint32_t output_width = ((input_width - kernel_size[1] - ((kernel_size[0] - 1) * (dilation[0] - 1)) + 2 * padding[1]) / stride[1]) + 1; diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp index 4cd6fe1c57f..054177ac925 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp @@ -371,9 +371,11 @@ bool use_matmul_for_1x1_conv( const std::array& stride, const std::array& padding, const std::array& dilation, - uint32_t groups) { + uint32_t groups, + const Conv2dConfig& conv_config) { + bool is_width_sharded = (conv_config.shard_layout.has_value() && conv_config.shard_layout.value() == TensorMemoryLayout::WIDTH_SHARDED); return kernel_size[0] == 1 && kernel_size[1] == 1 && stride[0] == stride[1] && stride[0] == 1 && padding[0] == 0 && - padding[1] == 0 && dilation[0] == 1 && dilation[1] == 1 && groups == 1; + padding[1] == 0 && dilation[0] == 1 && dilation[1] == 1 && groups == 1 && (not is_width_sharded); } // Implements a heuristic for selecting shard layout based on how many tenix cores are available diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.hpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.hpp index f190345f506..480bcb29a91 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.hpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.hpp @@ -95,7 +95,8 @@ bool use_matmul_for_1x1_conv( const std::array& stride, const std::array& padding, const std::array& dilation, - uint32_t groups); + uint32_t groups, + const Conv2dConfig& conv_config); sliding_window::ParallelConfig determine_parallel_config( const TensorMemoryLayout shard_layout, @@ -109,6 +110,12 @@ sliding_window::ParallelConfig determine_parallel_config( bool enable_channels_padding, bool is_out_tiled = true); +sliding_window::ParallelConfig determine_output_parallel_config( + const sliding_window::ParallelConfig& input_parallel_config, + const CoreCoord& compute_grid_size, + uint32_t out_channels, + bool is_mm_conv); + uint32_t get_num_cores_nhw_from_parallel_config(const sliding_window::ParallelConfig& pconfig); uint32_t get_num_cores_channels_from_parallel_config(const sliding_window::ParallelConfig& pconfig); @@ -171,12 +178,6 @@ shard_or_reshard_tensor_if_required( bool auto_shard, bool is_non_tile_mul_width = false); -sliding_window::ParallelConfig determine_output_parallel_config( - const sliding_window::ParallelConfig& input_parallel_config, - const CoreCoord& compute_grid_size, - uint32_t out_channels, - bool is_mm_conv); - std::ostream& operator<<(std::ostream& os, const Conv2dConfig& config); } // namespace operations::conv diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp index f7ddce08192..e54e7234acf 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp @@ -799,9 +799,8 @@ ttnn::Tensor prepare_conv_weights( Conv2dConfig conv_config = conv_config_.value_or(Conv2dConfig()); DeviceComputeKernelConfig compute_config = compute_config_.value_or( init_device_compute_kernel_config(device->arch(), std::nullopt, MathFidelity::HiFi4, true, false, false)); - const bool mm_conv = use_matmul_for_1x1_conv(kernel_size, stride, padding, dilation, groups); - const uint32_t output_height = - ((input_height - kernel_size[0] - ((kernel_size[0] - 1) * (dilation[0] - 1)) + 2 * padding[0]) / stride[0]) + 1; + const bool mm_conv = use_matmul_for_1x1_conv(kernel_size, stride, padding, dilation, groups, conv_config); + const uint32_t output_height = ((input_height - kernel_size[0] - ((kernel_size[0] - 1 ) * (dilation[0] - 1)) + 2 * padding[0]) / stride[0]) + 1; const uint32_t output_width = ((input_width - kernel_size[1] - ((kernel_size[0] - 1) * (dilation[0] - 1)) + 2 * padding[1]) / stride[1]) + 1; auto opt_conv_op_block_config = get_opt_block_config( @@ -887,15 +886,16 @@ ttnn::Tensor prepare_conv_bias( TT_FATAL( !ttnn::is_tensor_on_device_or_multidevice(bias_tensor), "Error: bias tensor must be on host for preparation."); - const bool mm_conv = use_matmul_for_1x1_conv(kernel_size, stride, padding, dilation, groups); - const uint32_t output_height = - ((input_height - kernel_size[0] - ((kernel_size[0] - 1) * (dilation[0] - 1)) + 2 * padding[0]) / stride[0]) + 1; + Conv2dConfig conv_config = conv_config_.value_or(Conv2dConfig()); + + const bool mm_conv = use_matmul_for_1x1_conv(kernel_size, stride, padding, dilation, groups, conv_config); + const uint32_t output_height = ((input_height - kernel_size[0] - ((kernel_size[0] - 1 ) * (dilation[0] - 1)) + 2 * padding[0]) / stride[0]) + 1; const uint32_t output_width = ((input_width - kernel_size[1] - ((kernel_size[0] - 1) * (dilation[0] - 1)) + 2 * padding[1]) / stride[1]) + 1; - Conv2dConfig conv_config = conv_config_.value_or(Conv2dConfig()); DeviceComputeKernelConfig compute_config = compute_config_.value_or( init_device_compute_kernel_config(device->arch(), std::nullopt, MathFidelity::HiFi4, true, false, false)); + auto opt_conv_op_block_config = get_opt_block_config( mm_conv, in_channels, diff --git a/ttnn/cpp/ttnn/operations/conv/conv_transpose2d/conv_transpose2d.cpp b/ttnn/cpp/ttnn/operations/conv/conv_transpose2d/conv_transpose2d.cpp index a2700b26e55..368c4289ef7 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv_transpose2d/conv_transpose2d.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv_transpose2d/conv_transpose2d.cpp @@ -167,8 +167,7 @@ Result conv_transpose2d( log_debug(LogOp, "Padding : ({},{}) ({},{})", input_pad_top, input_pad_bottom, input_pad_left, input_pad_right); - const bool mm_conv = use_matmul_for_1x1_conv( - kernel_size, {1, 1}, {input_pad_top + input_pad_bottom, input_pad_left + input_pad_right}, dilation, groups); + const bool mm_conv = use_matmul_for_1x1_conv(kernel_size, {1, 1}, {input_pad_top + input_pad_bottom, input_pad_left + input_pad_right}, dilation, groups, conv_config); const auto compute_grid_size = device->compute_with_storage_grid_size(); From 5553f2673f5a76983a2d345cf9cbd4f5382a8e27 Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Wed, 25 Dec 2024 05:50:41 +0000 Subject: [PATCH 6/9] #0: Check Bias shape before padding --- .../cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp index e54e7234acf..fb47460e6fd 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp @@ -766,6 +766,7 @@ std::pair> prepare_conv_weights_biases bias_tensor_ = bias_tensor.value(); bool is_bias_tensor_is_on_device = ttnn::is_tensor_on_device_or_multidevice(bias_tensor_); if(!is_bias_tensor_is_on_device) { + TT_FATAL(bias_tensor_.shape()[3]==out_channels, "Bias must have the same length as output channels"); bias_tensor_ = conv_bias_layout_convert(bias_tensor_, weights_bias_dtype, weight_block_h_ntiles, weight_block_w_ntiles, output_parallel_config, device, out_channels_padded, is_non_tile_mul_width); bias_tensor_ = ttnn::operations::core::to_device(bias_tensor_, device, std::nullopt); } @@ -938,6 +939,8 @@ ttnn::Tensor prepare_conv_bias( bool is_non_tile_mul_width = check_non_tile_mul_width(device, conv_config, in_channels); ttnn::Tensor bias_tensor_ = bias_tensor; + TT_FATAL(bias_tensor_.shape()[3]==out_channels, "Bias must have the same length as output channels"); + bias_tensor_ = conv_bias_layout_convert( bias_tensor_, conv_config.weights_dtype, From 66c41198d2b99a6b51b78a67bb13a66bbc52284c Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Sat, 28 Dec 2024 16:04:30 +0530 Subject: [PATCH 7/9] #0: Add comment on why reshape was used. --- ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp index 183c962f277..f3cccc8a2d5 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp @@ -194,6 +194,7 @@ Result conv2d( if (bypass_halo) { if (input_tensor_post_tm.layout() == Layout::TILE) { + // Reshape is used as a workaround to an issue in to_layout mentioned here : https://github.com/tenstorrent/tt-metal/issues/16330 input_tensor_post_tm = ttnn::reshape(input_tensor_post_tm, input_tensor_post_tm.get_padded_shape()); input_tensor_post_tm = ttnn::to_layout( input_tensor_post_tm, Layout::ROW_MAJOR, std::nullopt, std::nullopt, device); From 963f5bc7360d1c09b9528487479f4b8bd2e3d473 Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Mon, 6 Jan 2025 05:22:29 +0000 Subject: [PATCH 8/9] #0: Rebase fix --- .../ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp | 4 ++-- .../operations/conv/conv_transpose2d/conv_transpose2d.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp index fb47460e6fd..284f7597249 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp @@ -735,10 +735,10 @@ std::pair> prepare_conv_weights_biases // for conv op, pad the weights to block shape if (input_parallel_config.shard_scheme == TensorMemoryLayout::HEIGHT_SHARDED) { - weight_tensor_ = tt::tt_metal::convert_conv_weight_tensor_to_special_padding_tiled_layout( + weight_tensor_ = convert_conv_weight_tensor_to_special_padding_tiled_layout( weight_tensor_, weight_block_h_ntiles, weight_block_w_ntiles, weights_bias_dtype); } else if(input_parallel_config.shard_scheme == TensorMemoryLayout::BLOCK_SHARDED) { - weight_tensor_ = tt::tt_metal::convert_conv_weight_tensor_to_tiled_layout_block_sharded( + weight_tensor_ = convert_conv_weight_tensor_to_tiled_layout_block_sharded( weight_tensor_, input_num_cores_channels, weights_bias_dtype); } else { weight_tensor_ = convert_conv_weight_tensor_to_tiled_layout( diff --git a/ttnn/cpp/ttnn/operations/conv/conv_transpose2d/conv_transpose2d.cpp b/ttnn/cpp/ttnn/operations/conv/conv_transpose2d/conv_transpose2d.cpp index 368c4289ef7..eb1a151403c 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv_transpose2d/conv_transpose2d.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv_transpose2d/conv_transpose2d.cpp @@ -267,7 +267,6 @@ Result conv_transpose2d( get_fp32_dest_acc_en(compute_config), conv_config.enable_split_reader); - // TODO: Flip the Weights bool weight_is_on_device = ttnn::is_tensor_on_device_or_multidevice(weight_tensor); ttnn::Tensor weight_tensor_on_device = weight_tensor; std::optional bias_tensor_on_device = bias_tensor; @@ -281,6 +280,7 @@ Result conv_transpose2d( opt_conv_op_block_config.act_block_w_ntiles, opt_conv_op_block_config.out_subblock_w_ntiles, parallel_config, + output_parallel_config, device, groups, opt_conv_op_block_config.act_block_h_ntiles, From 94a0cbc363feb06671ee95e2aedb2eefb6bba1a8 Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Mon, 6 Jan 2025 05:25:44 +0000 Subject: [PATCH 9/9] #0: Refactor --- .../ttnn/operations/conv/conv2d/conv2d.cpp | 10 +++--- .../operations/conv/conv2d/conv2d_utils.cpp | 3 +- .../conv/conv2d/prepare_conv2d_weights.cpp | 34 ++++++++++++------- .../conv/conv2d/prepare_conv2d_weights.hpp | 2 +- .../conv_transpose2d/conv_transpose2d.cpp | 8 ++++- 5 files changed, 38 insertions(+), 19 deletions(-) diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp index f3cccc8a2d5..6e5839ac82a 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp @@ -56,7 +56,8 @@ Result conv2d( const std::optional& memory_config) { Conv2dConfig conv_config = conv_config_.value_or(Conv2dConfig()); const bool mm_conv = use_matmul_for_1x1_conv(kernel_size, stride, padding, dilation, groups, conv_config); - const uint32_t output_height = ((input_height - kernel_size[0] - ((kernel_size[0] - 1 ) * (dilation[0] - 1)) + 2 * padding[0]) / stride[0]) + 1; + const uint32_t output_height = + ((input_height - kernel_size[0] - ((kernel_size[0] - 1) * (dilation[0] - 1)) + 2 * padding[0]) / stride[0]) + 1; const uint32_t output_width = ((input_width - kernel_size[1] - ((kernel_size[0] - 1) * (dilation[0] - 1)) + 2 * padding[1]) / stride[1]) + 1; @@ -194,10 +195,11 @@ Result conv2d( if (bypass_halo) { if (input_tensor_post_tm.layout() == Layout::TILE) { - // Reshape is used as a workaround to an issue in to_layout mentioned here : https://github.com/tenstorrent/tt-metal/issues/16330 + // Reshape is used as a workaround to an issue in to_layout mentioned here : + // https://github.com/tenstorrent/tt-metal/issues/16330 input_tensor_post_tm = ttnn::reshape(input_tensor_post_tm, input_tensor_post_tm.get_padded_shape()); - input_tensor_post_tm = ttnn::to_layout( - input_tensor_post_tm, Layout::ROW_MAJOR, std::nullopt, std::nullopt, device); + input_tensor_post_tm = + ttnn::to_layout(input_tensor_post_tm, Layout::ROW_MAJOR, std::nullopt, std::nullopt, device); } } else { Tensor halo_output = ttnn::halo( diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp index 054177ac925..5913f8f8cdd 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp @@ -373,7 +373,8 @@ bool use_matmul_for_1x1_conv( const std::array& dilation, uint32_t groups, const Conv2dConfig& conv_config) { - bool is_width_sharded = (conv_config.shard_layout.has_value() && conv_config.shard_layout.value() == TensorMemoryLayout::WIDTH_SHARDED); + bool is_width_sharded = + (conv_config.shard_layout.has_value() && conv_config.shard_layout.value() == TensorMemoryLayout::WIDTH_SHARDED); return kernel_size[0] == 1 && kernel_size[1] == 1 && stride[0] == stride[1] && stride[0] == 1 && padding[0] == 0 && padding[1] == 0 && dilation[0] == 1 && dilation[1] == 1 && groups == 1 && (not is_width_sharded); } diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp index 284f7597249..b659f7ea475 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.cpp @@ -664,7 +664,7 @@ std::pair> prepare_conv_weights_biases uint32_t weight_block_w_ntiles, const ParallelConfig& input_parallel_config, const ParallelConfig& output_parallel_config, - T * device, + T* device, uint32_t groups, uint32_t act_block_h_ntiles, uint32_t input_width, @@ -737,7 +737,7 @@ std::pair> prepare_conv_weights_biases if (input_parallel_config.shard_scheme == TensorMemoryLayout::HEIGHT_SHARDED) { weight_tensor_ = convert_conv_weight_tensor_to_special_padding_tiled_layout( weight_tensor_, weight_block_h_ntiles, weight_block_w_ntiles, weights_bias_dtype); - } else if(input_parallel_config.shard_scheme == TensorMemoryLayout::BLOCK_SHARDED) { + } else if (input_parallel_config.shard_scheme == TensorMemoryLayout::BLOCK_SHARDED) { weight_tensor_ = convert_conv_weight_tensor_to_tiled_layout_block_sharded( weight_tensor_, input_num_cores_channels, weights_bias_dtype); } else { @@ -765,9 +765,17 @@ std::pair> prepare_conv_weights_biases if (bias_tensor.has_value()) { bias_tensor_ = bias_tensor.value(); bool is_bias_tensor_is_on_device = ttnn::is_tensor_on_device_or_multidevice(bias_tensor_); - if(!is_bias_tensor_is_on_device) { - TT_FATAL(bias_tensor_.shape()[3]==out_channels, "Bias must have the same length as output channels"); - bias_tensor_ = conv_bias_layout_convert(bias_tensor_, weights_bias_dtype, weight_block_h_ntiles, weight_block_w_ntiles, output_parallel_config, device, out_channels_padded, is_non_tile_mul_width); + if (!is_bias_tensor_is_on_device) { + TT_FATAL(bias_tensor_.shape()[3] == out_channels, "Bias must have the same length as output channels"); + bias_tensor_ = conv_bias_layout_convert( + bias_tensor_, + weights_bias_dtype, + weight_block_h_ntiles, + weight_block_w_ntiles, + output_parallel_config, + device, + out_channels_padded, + is_non_tile_mul_width); bias_tensor_ = ttnn::operations::core::to_device(bias_tensor_, device, std::nullopt); } } @@ -801,7 +809,8 @@ ttnn::Tensor prepare_conv_weights( DeviceComputeKernelConfig compute_config = compute_config_.value_or( init_device_compute_kernel_config(device->arch(), std::nullopt, MathFidelity::HiFi4, true, false, false)); const bool mm_conv = use_matmul_for_1x1_conv(kernel_size, stride, padding, dilation, groups, conv_config); - const uint32_t output_height = ((input_height - kernel_size[0] - ((kernel_size[0] - 1 ) * (dilation[0] - 1)) + 2 * padding[0]) / stride[0]) + 1; + const uint32_t output_height = + ((input_height - kernel_size[0] - ((kernel_size[0] - 1) * (dilation[0] - 1)) + 2 * padding[0]) / stride[0]) + 1; const uint32_t output_width = ((input_width - kernel_size[1] - ((kernel_size[0] - 1) * (dilation[0] - 1)) + 2 * padding[1]) / stride[1]) + 1; auto opt_conv_op_block_config = get_opt_block_config( @@ -840,8 +849,8 @@ ttnn::Tensor prepare_conv_weights( shard_orientation, !use_non_tile_height); - ParallelConfig output_parallel_config = - determine_output_parallel_config(parallel_config, device->compute_with_storage_grid_size(), out_channels, mm_conv); + ParallelConfig output_parallel_config = determine_output_parallel_config( + parallel_config, device->compute_with_storage_grid_size(), out_channels, mm_conv); bool is_non_tile_mul_width = check_non_tile_mul_width(device, conv_config, in_channels); std::optional bias_tensor = std::nullopt; @@ -890,7 +899,8 @@ ttnn::Tensor prepare_conv_bias( Conv2dConfig conv_config = conv_config_.value_or(Conv2dConfig()); const bool mm_conv = use_matmul_for_1x1_conv(kernel_size, stride, padding, dilation, groups, conv_config); - const uint32_t output_height = ((input_height - kernel_size[0] - ((kernel_size[0] - 1 ) * (dilation[0] - 1)) + 2 * padding[0]) / stride[0]) + 1; + const uint32_t output_height = + ((input_height - kernel_size[0] - ((kernel_size[0] - 1) * (dilation[0] - 1)) + 2 * padding[0]) / stride[0]) + 1; const uint32_t output_width = ((input_width - kernel_size[1] - ((kernel_size[0] - 1) * (dilation[0] - 1)) + 2 * padding[1]) / stride[1]) + 1; @@ -934,12 +944,12 @@ ttnn::Tensor prepare_conv_bias( shard_orientation, !use_non_tile_height); - ParallelConfig output_parallel_config = - determine_output_parallel_config(parallel_config, device->compute_with_storage_grid_size(), out_channels, mm_conv); + ParallelConfig output_parallel_config = determine_output_parallel_config( + parallel_config, device->compute_with_storage_grid_size(), out_channels, mm_conv); bool is_non_tile_mul_width = check_non_tile_mul_width(device, conv_config, in_channels); ttnn::Tensor bias_tensor_ = bias_tensor; - TT_FATAL(bias_tensor_.shape()[3]==out_channels, "Bias must have the same length as output channels"); + TT_FATAL(bias_tensor_.shape()[3] == out_channels, "Bias must have the same length as output channels"); bias_tensor_ = conv_bias_layout_convert( bias_tensor_, diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.hpp b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.hpp index a7c724a45d8..e42655bb0e1 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.hpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/prepare_conv2d_weights.hpp @@ -113,7 +113,7 @@ std::pair> prepare_conv_weights_biases uint32_t weight_block_w_ntiles, const sliding_window::ParallelConfig& input_parallel_config, const sliding_window::ParallelConfig& output_parallel_config, - T * device, + T* device, uint32_t groups, uint32_t act_block_h_ntiles, uint32_t input_width, diff --git a/ttnn/cpp/ttnn/operations/conv/conv_transpose2d/conv_transpose2d.cpp b/ttnn/cpp/ttnn/operations/conv/conv_transpose2d/conv_transpose2d.cpp index eb1a151403c..c5ed25af6e4 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv_transpose2d/conv_transpose2d.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv_transpose2d/conv_transpose2d.cpp @@ -167,7 +167,13 @@ Result conv_transpose2d( log_debug(LogOp, "Padding : ({},{}) ({},{})", input_pad_top, input_pad_bottom, input_pad_left, input_pad_right); - const bool mm_conv = use_matmul_for_1x1_conv(kernel_size, {1, 1}, {input_pad_top + input_pad_bottom, input_pad_left + input_pad_right}, dilation, groups, conv_config); + const bool mm_conv = use_matmul_for_1x1_conv( + kernel_size, + {1, 1}, + {input_pad_top + input_pad_bottom, input_pad_left + input_pad_right}, + dilation, + groups, + conv_config); const auto compute_grid_size = device->compute_with_storage_grid_size();