Skip to content

Commit

Permalink
#0: fix if sharding width is not multiple of 16.
Browse files Browse the repository at this point in the history
  • Loading branch information
shwetankTT committed Apr 24, 2024
1 parent a44e9c9 commit 5a81404
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 3 deletions.
2 changes: 2 additions & 0 deletions tests/ttnn/unit_tests/operations/test_silu_row_major.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ def test_silu_multi_core(device, input_shape, shard_strategy):
shard_height = math.ceil(batch_size * height * width / ncores)
shard_width = num_channels

if (shard_height * shard_width) % 1024 != 0:
pytest.skip("Shard sizes that are not multiples of 1024 are not supported.")
shard_shape = (shard_height, shard_width)
logger.debug(f"shard_shape={shard_shape}")
shard_spec = ttnn.experimental.tensor.ShardSpec(shard_grid, shard_shape, shard_orientation, False)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,14 @@ operation::ProgramWithCallbacks eltwise_unary_sharded(const Tensor &input, Tenso
uint32_t output_tile_size = tt::tt_metal::detail::TileSize(out_df);

TT_FATAL(input_tile_size == output_tile_size, "Input and output tile size should be same");
uint32_t shard_size_in_bytes = shard_spec.numel() * datum_size(act_df);
size_t shard_height = shard_spec.shape[0];

uint32_t num_tile_per_core = (shard_size_in_bytes + input_tile_size - 1) / input_tile_size; //ceil value
TT_FATAL(input_tile_size <= shard_size_in_bytes, "Input tile size should be less than shard size");
TT_FATAL((shard_spec.shape[1] * datum_size(act_df)) % L1_ALIGNMENT == 0, "Shard width should be multiple of L1_ADRESS_ALIGNMENT");
size_t shard_width = round_up_to_mul16(shard_spec.shape[1]); // rounding up is done to aligned with --> tt-metal/tt_metal/detail/util.hpp:31
size_t shard_size_in_bytes = shard_height * shard_width * datum_size(act_df);

uint32_t num_tile_per_core = (shard_size_in_bytes + input_tile_size - 1) / input_tile_size; //ceil value
TT_FATAL(shard_size_in_bytes % input_tile_size == 0, "Shard Size must be multiple of input_tile_size");

uint32_t in_cb_id = CB::c_in0;
uint32_t buffering_factor = 1; // data is already fully buffered in the CBs since its sharded
Expand Down
2 changes: 2 additions & 0 deletions tt_metal/common/test_tiles.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ inline std::vector<T> untilize_nchw(const BufferType<T>& in, const std::vector<s
return result;
}

inline std::uint32_t round_up_to_mul16(std::uint32_t val) { return ((val & 15) == 0) ? val : (val | 15)+1; }

inline std::uint32_t round_up_to_mul32(std::uint32_t val) { return ((val & 31) == 0) ? val : (val | 31)+1; }

// Converts a linear non-zero-padded row-major tensor to zero-padded-32 32-swizzled tilized row-major tensor
Expand Down

0 comments on commit 5a81404

Please sign in to comment.