diff --git a/tests/sweep_framework/sweeps/conv2d/short/conv2d_short_sweep.py b/tests/sweep_framework/sweeps/conv2d/short/conv2d_short_sweep.py index dfb7d5e04b9..8051e440041 100644 --- a/tests/sweep_framework/sweeps/conv2d/short/conv2d_short_sweep.py +++ b/tests/sweep_framework/sweeps/conv2d/short/conv2d_short_sweep.py @@ -1620,7 +1620,6 @@ def test_conv2d_localrun(device, input_spec): [1, 960, 960, 24, 24, 5, 5, 1, 1, 2, 2, 960, False, 1], # 5 [1, 816, 816, 19, 19, 5, 5, 1, 1, 2, 2, 816, False, 1], # 19 [1, 816, 816, 23, 23, 5, 5, 2, 2, 0, 0, 816, False, 1], # 20 - [1, 1056, 1056, 96, 96, 3, 3, 2, 2, 1, 1, 4, False, 1], # 127 [1, 528, 528, 192, 192, 3, 3, 2, 2, 1, 1, 2, False, 1], # 220 [1, 2904, 2904, 48, 48, 3, 3, 2, 2, 1, 1, 11, False, 1], # 294 [1, 3024, 1232, 14, 14, 1, 1, 2, 2, 0, 0, 1, False, 1], # 1421 @@ -1630,8 +1629,6 @@ def test_conv2d_localrun(device, input_spec): [1, 768, 3, 224, 224, 32, 32, 32, 32, 0, 0, 1, False, 1], # 1460 [1, 768, 3, 224, 224, 32, 32, 32, 32, 0, 0, 1, True, 1], # 1461 [1, 768, 3, 384, 512, 32, 32, 32, 32, 0, 0, 1, True, 1], # 1464 - [1, 64, 3, 720, 1280, 7, 7, 2, 2, 3, 3, 1, False, 1], # 1471 - [1, 64, 3, 800, 1088, 7, 7, 2, 2, 3, 3, 1, False, 1], # 1472 [1, 1, 64, 480, 640, 3, 3, 1, 1, 1, 1, 1, True, 1], # 1495 [1, 64, 64, 480, 640, 3, 3, 1, 1, 1, 1, 1, True, 1], # 1496 [1, 640, 1920, 32, 32, 3, 3, 1, 1, 1, 1, 1, True, 1], # 1522 diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.hpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.hpp index 69ce604a671..52c1eb3a352 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.hpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.hpp @@ -24,8 +24,9 @@ struct Conv2dConfig { DataType weights_dtype = DataType::BFLOAT16; string activation = ""; uint32_t input_channels_alignment = 32; - bool deallocate_activation = false; - bool reallocate_halo_output = false; + bool deallocate_activation = false; // If user tensor will be deallocated if it's on device. + bool reallocate_halo_output = true; // If true after halo device op is done, the output tensor will be reallocated. + // in case deallocate_activation is set to true. uint32_t act_block_h_override = 0; // This argument is ignored when shard_layout == WIDTH_SHARDED. uint32_t act_block_w_div = 1; // Amount by which the maximum possible act_block_width is divided. Max act_block_w = in_channels / (total_num_cores * TILE_WIDTH); // Ignored when shard_layout == HEIGHT_SHARDED or BLOCK_SHARDED