Conv2dConfig reallocate_halo_output default to true

Idea behind this flag is to enable reallocation of halo output buffer in place of input buffer if case deallocate_activation flag is also set. In this case halo output is moved up and memory fragmentation is reduced. In case deallocate_activation is not set reallocate_halo_output won't have an effect as ttnn::move will be a no-op since input buffer is not deallocated. Current problem is that if user sets deallocate_activation it doesn't help them with memory issue unless they set reallocate_halo_output as well. This change is addressing this by setting reallocate_halo_output to true by default. This helps by increassing pass rate in ttnn torch traces.
tenstorrent · Jan 7, 2025 · 51652b4 · 51652b4
1 parent 932ee21
commit 51652b4
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 13 deletions.
diff --git a/tests/sweep_framework/sweeps/conv2d/short/conv2d_short_sweep.py b/tests/sweep_framework/sweeps/conv2d/short/conv2d_short_sweep.py
@@ -1620,20 +1620,15 @@ def test_conv2d_localrun(device, input_spec):
     [1, 960, 960, 24, 24, 5, 5, 1, 1, 2, 2, 960, False, 1],  # 5
     [1, 816, 816, 19, 19, 5, 5, 1, 1, 2, 2, 816, False, 1],  # 19
     [1, 816, 816, 23, 23, 5, 5, 2, 2, 0, 0, 816, False, 1],  # 20
-    [1, 1056, 1056, 96, 96, 3, 3, 2, 2, 1, 1, 4, False, 1],  # 127
     [1, 528, 528, 192, 192, 3, 3, 2, 2, 1, 1, 2, False, 1],  # 220
-    [1, 2904, 2904, 48, 48, 3, 3, 2, 2, 1, 1, 11, False, 1],  # 294
     [1, 819, 256, 100, 136, 3, 3, 1, 1, 1, 1, 1, True, 1],  # 1443
     [1, 819, 256, 50, 68, 3, 3, 1, 1, 1, 1, 1, True, 1],  # 1447
     [1, 1024, 3, 224, 224, 32, 32, 32, 32, 0, 0, 1, True, 1],  # 1458
     [1, 768, 3, 224, 224, 32, 32, 32, 32, 0, 0, 1, False, 1],  # 1460
     [1, 768, 3, 224, 224, 32, 32, 32, 32, 0, 0, 1, True, 1],  # 1461
     [1, 768, 3, 384, 512, 32, 32, 32, 32, 0, 0, 1, True, 1],  # 1464
-    [1, 64, 3, 720, 1280, 7, 7, 2, 2, 3, 3, 1, False, 1],  # 1471
-    [1, 64, 3, 800, 1088, 7, 7, 2, 2, 3, 3, 1, False, 1],  # 1472
     [1, 1, 64, 480, 640, 3, 3, 1, 1, 1, 1, 1, True, 1],  # 1495
     [1, 64, 64, 480, 640, 3, 3, 1, 1, 1, 1, 1, True, 1],  # 1496
-    [1, 640, 1920, 32, 32, 3, 3, 1, 1, 1, 1, 1, True, 1],  # 1522
     [1, 320, 320, 64, 64, 3, 3, 1, 1, 1, 1, 1, True, 1],  # 1530
     [1, 320, 640, 64, 64, 3, 3, 1, 1, 1, 1, 1, True, 1],  # 1540
     [1, 320, 960, 64, 64, 3, 3, 1, 1, 1, 1, 1, True, 1],  # 1545

diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.hpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.hpp
@@ -24,14 +24,14 @@ struct Conv2dConfig {
     DataType weights_dtype = DataType::BFLOAT16;
     string activation = "";
     uint32_t input_channels_alignment = 32;
-    bool deallocate_activation = false;
-    bool reallocate_halo_output = false;
-    uint32_t act_block_h_override = 0;  // This argument is ignored when shard_layout == WIDTH_SHARDED.
-    uint32_t act_block_w_div =
-        1;  // Amount by which the maximum possible act_block_width is divided. Max act_block_w = in_channels /
-            // (total_num_cores * TILE_WIDTH); Ignored when shard_layout == HEIGHT_SHARDED or BLOCK_SHARDED
-    bool reshard_if_not_optimal = false;    // if true, override_sharding_config should not be set to true
-    bool override_sharding_config = false;  // if true, reshard_if_not_optimal should not be set to true
+    bool deallocate_activation = false; // If user tensor will be deallocated if it's on device.
+    bool reallocate_halo_output = true; // If true after halo device op is done, the output tensor will be reallocated.
+                                        // in case deallocate_activation is set to true.
+    uint32_t act_block_h_override = 0; // This argument is ignored when shard_layout == WIDTH_SHARDED.
+    uint32_t act_block_w_div = 1; // Amount by which the maximum possible act_block_width is divided. Max act_block_w = in_channels / (total_num_cores * TILE_WIDTH);
+                                  // Ignored when shard_layout == HEIGHT_SHARDED or BLOCK_SHARDED
+    bool reshard_if_not_optimal = false; // if true, override_sharding_config should not be set to true
+    bool override_sharding_config = false; // if true, reshard_if_not_optimal should not be set to true
     std::optional<TensorMemoryLayout> shard_layout = std::nullopt;
     std::optional<CoreRangeSet> core_grid = std::nullopt;  // used only if override_sharding_config is true
     bool transpose_shards = true;  // used only if override_sharding_config is true and if height sharding is false