Skip to content

Commit

Permalink
13541 set act_block_w_div to max value
Browse files Browse the repository at this point in the history
Chose max valid value for act_block_w_div in
auto-sharding code path for conv2d in order to
minimize L1 usage for for WIDTH_SHARDED convs.

This improves pass rate on ttnn torch traces.
  • Loading branch information
Pavle Josipovic authored and pavlejosipovic committed Dec 4, 2024
1 parent 77e056e commit 68bc110
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,6 @@ def test_conv2d_localrun(device, input_spec):
# Input is 32MB maps to MM 64 cores, we neeed to avoid sharding this tensor and use dram intrelaved directly with MM
[1, 256, 1024, 128, 128, 1, 1, 1, 1, 0, 0, 1, False, 1], # 5
[1, 1056, 1056, 96, 96, 3, 3, 2, 2, 1, 1, 4, False, 1], # 14
[1, 2904, 2904, 24, 24, 3, 3, 1, 1, 1, 1, 11, False, 1], # 169
[1, 2904, 2904, 48, 48, 3, 3, 2, 2, 1, 1, 11, False, 1], # 170
[1, 1024, 3, 224, 224, 32, 32, 32, 32, 0, 0, 1, True, 1], # 172
[1, 768, 3, 224, 224, 32, 32, 32, 32, 0, 0, 1, False, 1], # 181
Expand All @@ -465,7 +464,6 @@ def test_conv2d_localrun(device, input_spec):
[1, 64, 3, 720, 1280, 7, 7, 2, 2, 3, 3, 1, False, 1], # 203
[1, 64, 3, 800, 1088, 7, 7, 2, 2, 3, 3, 1, False, 1], # 204
[1, 528, 528, 192, 192, 3, 3, 2, 2, 1, 1, 2, False, 1], # 292
[1, 7392, 7392, 24, 24, 3, 3, 2, 2, 1, 1, 28, False, 1], # 366
[1, 816, 816, 19, 19, 5, 5, 1, 1, 2, 2, 816, False, 1], # 373
[1, 816, 816, 23, 23, 5, 5, 2, 2, 0, 0, 816, False, 1], # 374
[1, 960, 960, 24, 24, 5, 5, 1, 1, 2, 2, 960, False, 1], # 394
Expand Down
4 changes: 2 additions & 2 deletions tests/ttnn/unit_tests/operations/test_new_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def run_conv(
if config_override and "act_block_h" in config_override and not auto_shard:
conv_config.act_block_h_override = config_override["act_block_h"]

if config_override and "act_block_w_div" in config_override:
if config_override and "act_block_w_div" in config_override and not auto_shard:
conv_config.act_block_w_div = config_override["act_block_w_div"]

if config_override and "num_cores_nhw" in config_override:
Expand Down Expand Up @@ -635,7 +635,7 @@ def test_conv_ws(
enable_split_reader=False,
enable_subblock_padding=False,
reshard_if_not_optimal=True,
act_block_w_div=act_block_w_div,
act_block_w_div=act_block_w_div if not auto_shard else 1,
act_block_h_override=32,
)
[tt_output_tensor_on_device, out_height, out_width, weights_device, bias_device] = ttnn.conv2d(
Expand Down
20 changes: 20 additions & 0 deletions ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ OptimizedConvBlockConfig determine_per_core_conv_block_config(
? round_up(padded_in_channels * window_w, 32)
: padded_in_channels;
if(parallel_config.shard_scheme == TensorMemoryLayout::WIDTH_SHARDED) {
TT_ASSERT(padded_in_channels % (32 * parallel_config.grid.num_cores() * act_block_w_div) == 0);
act_block_w = (padded_in_channels * window_h * window_w)/(parallel_config.grid.num_cores() * act_block_w_div);
}
TT_ASSERT(act_block_w % 32 == 0);
Expand Down Expand Up @@ -877,6 +878,25 @@ static void adjust_conv_op_config_for_auto_shard(
// be conservative with L1 memory usage.
conv_config.act_block_h_override = constants::TILE_HEIGHT;
}

if (conv_config.act_block_w_div == 1 && conv_config.shard_layout == TensorMemoryLayout::WIDTH_SHARDED) {
uint32_t width_sharded_num_cores = determine_parallel_config(
TensorMemoryLayout::WIDTH_SHARDED,
batch_size,
in_channels,
output_height,
output_width,
out_channels,
compute_grid_size,
shard_orientation,
!is_mm_conv)
.grid.num_cores();
// Set act_block_w_div to max value to
// be conservative with L1 memory usage.
// act_block_w_div == 1 is currently the default value.
conv_config.act_block_w_div =
tt::div_up(in_channels, width_sharded_num_cores * constants::TILE_WIDTH);
}
}

template <typename T>
Expand Down
4 changes: 2 additions & 2 deletions ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ struct Conv2dConfig {
bool deallocate_activation = false;
bool reallocate_halo_output = false;
uint32_t act_block_h_override = 0; // This argument is ignored when shard_layout == WIDTH_SHARDED.
uint32_t act_block_w_div = 1; //Amount by which the maximum possible act_block_width is divided. Max act_block_w = (in_channels * window_w * window_h)/total_num_cores;
//Ignored when shard_layout == HEIGHT_SHARDED or BLOCK_SHARDED
uint32_t act_block_w_div = 1; // Amount by which the maximum possible act_block_width is divided. Max act_block_w = in_channels / (total_num_cores * TILE_WIDTH);
// Ignored when shard_layout == HEIGHT_SHARDED or BLOCK_SHARDED
bool reshard_if_not_optimal = false; // if true, override_sharding_config should not be set to true
bool override_sharding_config = false; // if true, reshard_if_not_optimal should not be set to true
std::optional<TensorMemoryLayout> shard_layout;
Expand Down

0 comments on commit 68bc110

Please sign in to comment.