From 53d15d1b35fdd647cd6525c71001d153afc3031f Mon Sep 17 00:00:00 2001 From: mouliraj-mcw Date: Wed, 11 Dec 2024 11:45:17 +0000 Subject: [PATCH] #15642: Update shapes --- .../binary/device/binary_composite_op.cpp | 6 ++-- ...cast_height_multi_core_program_factory.cpp | 8 ++--- ...core_sharded_optimized_program_factory.cpp | 8 ++--- ...ght_multi_core_sharded_program_factory.cpp | 10 +++--- ...dcast_width_multi_core_program_factory.cpp | 8 ++--- ...lement_wise_multi_core_program_factory.cpp | 4 +-- .../binary_backward/binary_backward.cpp | 24 ++++++------- .../operations/eltwise/complex/complex.cpp | 2 +- .../unary/device/unary_composite_op.cpp | 6 ++-- .../eltwise/unary_backward/unary_backward.cpp | 36 +++++++++---------- 10 files changed, 56 insertions(+), 56 deletions(-) diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp index 196314d10b29..137c3872ee06 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp @@ -437,9 +437,9 @@ Tensor _scatter(const Tensor& input_a, const Tensor& input_b, const std::optiona * by running reshape. */ Tensor _outer(const Tensor& input_a, const Tensor& input_b, const std::optional& output_mem_config) { - const tt::tt_metal::LegacyShape s_a = input_a.get_legacy_shape(); - const tt::tt_metal::LegacyShape s_b = input_b.get_legacy_shape(); - auto num_ones = [](const tt::tt_metal::LegacyShape& s) -> uint32_t { + const ttnn::SimpleShape s_a = input_a.get_padded_shape(); + const ttnn::SimpleShape s_b = input_b.get_padded_shape(); + auto num_ones = [](const ttnn::SimpleShape& s) -> uint32_t { uint32_t num1s = 0; for (uint32_t idx = 0; idx < 4; idx++) { num1s += (uint32_t)(s[idx] == 1); diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_program_factory.cpp index 6e3d972d2109..115e4c6c9e22 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_program_factory.cpp @@ -41,8 +41,8 @@ BinaryDeviceOperation ::BroadcastHeightMultiCore::create( auto& output = tensor_return_value; auto bcast_math = binary_op_type_to_bcast_op_math(operation_attributes.binary_op_type); - const auto ashape = a.get_legacy_shape(); - const auto bshape = b->get_legacy_shape(); + const auto ashape = a.get_padded_shape(); + const auto bshape = b->get_padded_shape(); uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1; uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1; uint32_t H = ashape[-2]; @@ -238,8 +238,8 @@ void BinaryDeviceOperation ::BroadcastHeightMultiCore::override_runtime_argument auto dst_dram_buffer = output_tensor.buffer(); - const auto ashape = input_tensor_a.get_legacy_shape(); - const auto bshape = input_tensor_b->get_legacy_shape(); + const auto ashape = input_tensor_a.get_padded_shape(); + const auto bshape = input_tensor_b->get_padded_shape(); uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1; uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1; uint32_t H = ashape[-2]; diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_sharded_optimized_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_sharded_optimized_program_factory.cpp index 5403ae82ed2c..167a2dd5682a 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_sharded_optimized_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_sharded_optimized_program_factory.cpp @@ -41,8 +41,8 @@ BinaryDeviceOperation::BroadcastHeightMultiCoreShardedOptimized::create( auto& output = tensor_return_value; auto bcast_math = binary_op_type_to_bcast_op_math(operation_attributes.binary_op_type); - const auto ashape = a.get_legacy_shape(); - const auto bshape = b->get_legacy_shape(); + const auto ashape = a.get_padded_shape(); + const auto bshape = b->get_padded_shape(); uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1; uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1; uint32_t H = ashape[-2]; @@ -267,9 +267,9 @@ void BinaryDeviceOperation ::BroadcastHeightMultiCoreShardedOptimized::override_ auto all_cores = shard_spec.grid; uint32_t ncores = shard_spec.num_cores(); uint32_t Wt = 0, Ht = 0; - const auto ashape = input_tensor_a.get_legacy_shape(); + const auto ashape = input_tensor_a.get_padded_shape(); uint32_t N = ashape[0], C = ashape[1], H = ashape[2], W = ashape[3]; - uint32_t bN = input_tensor_b->get_legacy_shape()[0]; + uint32_t bN = input_tensor_b->get_padded_shape()[0]; uint32_t NC = N * C; if (a.memory_config().memory_layout == TensorMemoryLayout::BLOCK_SHARDED) { Wt = shard_spec.shape[1] / TILE_WIDTH; diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_sharded_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_sharded_program_factory.cpp index f15671250583..54661ca021f1 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_sharded_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_sharded_program_factory.cpp @@ -36,8 +36,8 @@ BinaryDeviceOperation::BroadcastHeightMultiCoreSharded::create( auto& output = tensor_return_value; auto bcast_math = binary_op_type_to_bcast_op_math(operation_attributes.binary_op_type); - const auto ashape = a.get_legacy_shape(); - const auto bshape = b->get_legacy_shape(); + const auto ashape = a.get_padded_shape(); + const auto bshape = b->get_padded_shape(); uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1; uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1; uint32_t H = ashape[-2]; @@ -127,7 +127,7 @@ BinaryDeviceOperation::BroadcastHeightMultiCoreSharded::create( .set_globally_allocated_address(*output.buffer()); auto out_cb = tt_metal::CreateCircularBuffer(program, all_cores, output_cb_config); - uint32_t num_input_tiles = (b->get_legacy_shape()[-1] * output.element_size() + TILE_HW - 1) / TILE_HW; + uint32_t num_input_tiles = (b->get_padded_shape()[-1] * output.element_size() + TILE_HW - 1) / TILE_HW; uint32_t src1_cb_index = tt::CBIndex::c_1; tt_metal::CircularBufferConfig src1_cb_config = tt_metal::CircularBufferConfig(num_input_tiles * input1_tile_size, {{src1_cb_index, b_df}}) @@ -249,9 +249,9 @@ void BinaryDeviceOperation ::BroadcastHeightMultiCoreSharded::override_runtime_a auto all_cores = shard_spec.grid; uint32_t ncores = shard_spec.num_cores(); uint32_t Wt = 0, Ht = 0; - const auto ashape = input_tensor_a.get_legacy_shape(); + const auto ashape = input_tensor_a.get_padded_shape(); uint32_t N = ashape[0], C = ashape[1], H = ashape[2], W = ashape[3]; - uint32_t bN = input_tensor_b->get_legacy_shape()[0]; + uint32_t bN = input_tensor_b->get_padded_shape()[0]; uint32_t NC = N * C; if (a.memory_config().memory_layout == TensorMemoryLayout::BLOCK_SHARDED) { Wt = shard_spec.shape[1] / TILE_WIDTH; diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_width_multi_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_width_multi_core_program_factory.cpp index 3ab77f8fab6f..9aabfe7ed445 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_width_multi_core_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_width_multi_core_program_factory.cpp @@ -40,8 +40,8 @@ BinaryDeviceOperation::BroadcastWidthMultiCore::cached_program_t BinaryDeviceOpe auto& output = tensor_return_value; auto bcast_math = binary_op_type_to_bcast_op_math(operation_attributes.binary_op_type); - const auto ashape = a.get_legacy_shape(); - const auto bshape = b->get_legacy_shape(); + const auto ashape = a.get_padded_shape(); + const auto bshape = b->get_padded_shape(); uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1; uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1; uint32_t H = ashape[-2]; @@ -240,8 +240,8 @@ void BinaryDeviceOperation::BroadcastWidthMultiCore::override_runtime_arguments( auto dst_dram_buffer = output_tensor.buffer(); - const auto ashape = input_tensor_a.get_legacy_shape(); - const auto bshape = input_tensor_b->get_legacy_shape(); + const auto ashape = input_tensor_a.get_padded_shape(); + const auto bshape = input_tensor_b->get_padded_shape(); uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1; uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1; uint32_t H = ashape[-2]; diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp index ceef285e7172..2cec32083e39 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp @@ -97,8 +97,8 @@ inline __attribute__((always_inline)) void set_eltwise_binary_runtime_args( if (block_or_width_sharded) { block_size = block_width * block_height; end_core = (*shard_spec.value().grid.ranges().begin()).end_coord; - output_width = output.get_legacy_shape()[-1] / TILE_WIDTH; - uint32_t output_height = output.volume() / output.get_legacy_shape()[-1] / TILE_HEIGHT; + output_width = output.get_padded_shape()[-1] / TILE_WIDTH; + uint32_t output_height = output.volume() / output.get_padded_shape()[-1] / TILE_HEIGHT; last_unpadded_block_height = block_height - (round_up(output_height, block_height) - output_height); last_unpadded_block_width = block_width - (round_up(output_width, block_width) - output_width); } diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary_backward/binary_backward.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary_backward/binary_backward.cpp index 0d647f99577a..c5421ddfe289 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/binary_backward/binary_backward.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/binary_backward/binary_backward.cpp @@ -632,10 +632,10 @@ std::vector> ExecuteBackwardConcat::invoke( if (are_required_outputs[0]) { ttnn::SmallVector start_index = {0, 0, 0, 0}; ttnn::SmallVector end_index = { - input.get_legacy_shape()[0], - input.get_legacy_shape()[1], - input.get_legacy_shape()[2], - input.get_legacy_shape()[3]}; + input.get_padded_shape()[0], + input.get_padded_shape()[1], + input.get_padded_shape()[2], + input.get_padded_shape()[3]}; ttnn::SmallVector step = {1, 1, 1, 1}; ttnn::slice(queue_id, grad, start_index, end_index, step, std::nullopt, input_grad); grad_tensor[0] = input_grad; @@ -644,19 +644,19 @@ std::vector> ExecuteBackwardConcat::invoke( if (are_required_outputs[1]) { ttnn::SmallVector start_index_2 = {0, 0, 0, 0}; if (dim == 0) { - start_index_2 = {input.get_legacy_shape()[0], 0, 0, 0}; + start_index_2 = {input.get_padded_shape()[0], 0, 0, 0}; } else if (dim == 1) { - start_index_2 = {0, input.get_legacy_shape()[1], 0, 0}; + start_index_2 = {0, input.get_padded_shape()[1], 0, 0}; } else if (dim == 2) { - start_index_2 = {0, 0, input.get_legacy_shape()[2], 0}; + start_index_2 = {0, 0, input.get_padded_shape()[2], 0}; } else if (dim == 3) { - start_index_2 = {0, 0, 0, input.get_legacy_shape()[3]}; + start_index_2 = {0, 0, 0, input.get_padded_shape()[3]}; } ttnn::SmallVector end_index_2 = { - grad.get_legacy_shape()[0], - grad.get_legacy_shape()[1], - grad.get_legacy_shape()[2], - grad.get_legacy_shape()[3]}; + grad.get_padded_shape()[0], + grad.get_padded_shape()[1], + grad.get_padded_shape()[2], + grad.get_padded_shape()[3]}; ttnn::SmallVector step_2 = {1, 1, 1, 1}; ttnn::slice(queue_id, grad, start_index_2, end_index_2, step_2, std::nullopt, other_grad); grad_tensor[1] = other_grad; diff --git a/ttnn/cpp/ttnn/operations/eltwise/complex/complex.cpp b/ttnn/cpp/ttnn/operations/eltwise/complex/complex.cpp index a8cde4ccf237..07fc6230f530 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/complex/complex.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/complex/complex.cpp @@ -19,7 +19,7 @@ void ComplexTensor::deallocate() { } ComplexTensor CreateComplexTensor::invoke(const Tensor& real, const Tensor& imag) { - TT_ASSERT(real.get_legacy_shape() == imag.get_legacy_shape(), "Tensor shapes of real and imag should be identical"); + TT_ASSERT(real.get_padded_shape() == imag.get_padded_shape(), "Tensor shapes of real and imag should be identical"); return ComplexTensor({real, imag}); } diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp index a170e3d98558..74818d861fae 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp @@ -507,7 +507,7 @@ Tensor _variance_impl( const std::optional& output_mem_config) { ttnn::SmallVector dims = {2, 3}; constexpr float correction = 0.0f; - auto shape_wh = y.get_legacy_shape(); + auto shape_wh = y.get_padded_shape(); float scale = 1.0f / ((float)(shape_wh[3] * shape_wh[2]) - correction); Tensor sqr_y_minus_mean_y = ttnn::square(y_minus_mean_y, output_mem_config); return ttnn::sum(sqr_y_minus_mean_y, dims, true, std::nullopt, std::nullopt, scale); @@ -705,10 +705,10 @@ Tensor ExecuteUnaryCompositeThreshold::invoke( std::vector split_tensor_for_glu( const Tensor& input_a, int32_t dim, const std::optional& output_mem_config) { std::vector t_split; - tt::tt_metal::LegacyShape inshape(input_a.get_legacy_shape()); + ttnn::SimpleShape inshape(input_a.get_padded_shape()); TT_FATAL(((inshape[dim] / 2) % tt::constants::TILE_WIDTH == 0), "Split tensor dimension should be in full tile"); ttnn::SmallVector s_a = {0, 0, 0, 0}; - ttnn::SmallVector e_a = {input_a.get_legacy_shape()[0], inshape[1], inshape[2], inshape[3] / 2}; + ttnn::SmallVector e_a = {input_a.get_padded_shape()[0], inshape[1], inshape[2], inshape[3] / 2}; ttnn::SmallVector s_b = {0, 0, 0, inshape[3] / 2}; ttnn::SmallVector e_b = {inshape[0], inshape[1], inshape[2], inshape[3]}; diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary_backward/unary_backward.cpp b/ttnn/cpp/ttnn/operations/eltwise/unary_backward/unary_backward.cpp index 21c5853eca81..b4d518bdb10c 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/unary_backward/unary_backward.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/unary_backward/unary_backward.cpp @@ -1780,10 +1780,10 @@ std::vector ExecuteUnaryBackwardRepeat::invoke( auto output_memory_config = output_mem_config.value_or( input.memory_config()); // TODO: Remove after ternary forward ops migration is completed - auto shape_wh = input.get_legacy_shape(); + auto shape_wh = input.get_padded_shape(); TT_FATAL(shape_wh[0] == 1 && "input shape[0] should be 1", "Error"); auto ttnn_device = input.device(); - // input.get_legacy_shape()[0] + // input.get_padded_shape()[0] // If repeat shape has 0's, it returns zeros of given input if (shape[0] == 0 || shape[1] == 0 || shape[2] == 0 || shape[3] == 0) { Tensor zero_tensor = @@ -1880,7 +1880,7 @@ std::vector ExecuteUnaryBackwardProd::invoke( Tensor required = ttnn::permute(grad, after_permute_dims, output_memory_config); ttnn::SmallVector start_index = {0, 0, 0, 0}; ttnn::SmallVector end_index = { - grad.get_legacy_shape()[0], 1, grad.get_legacy_shape()[1], grad.get_legacy_shape()[2]}; + grad.get_padded_shape()[0], 1, grad.get_padded_shape()[1], grad.get_padded_shape()[2]}; Tensor new_slice_tensor = ttnn::slice(DefaultQueueId, required, start_index, end_index, step, std::nullopt); after_permute_dims = {0, 2, 3, 1}; updated_grad = ttnn::permute(new_slice_tensor, after_permute_dims, output_memory_config); @@ -1895,7 +1895,7 @@ std::vector ExecuteUnaryBackwardProd::invoke( Tensor required = ttnn::permute(grad, after_permute_dims, output_memory_config); ttnn::SmallVector start_index = {0, 0, 0, 0}; ttnn::SmallVector end_index = { - grad.get_legacy_shape()[0], 1, grad.get_legacy_shape()[1], grad.get_legacy_shape()[3]}; + grad.get_padded_shape()[0], 1, grad.get_padded_shape()[1], grad.get_padded_shape()[3]}; Tensor new_slice_tensor = ttnn::slice(DefaultQueueId, required, start_index, end_index, step, std::nullopt); updated_grad = ttnn::permute(new_slice_tensor, after_permute_dims, output_memory_config); if (updated_grad.get_layout() == Layout::ROW_MAJOR) { @@ -1925,9 +1925,9 @@ std::vector ExecuteUnaryBackwardProd::invoke( return grad_tensor; } else if (dim == 1 || dim == -3) { Tensor tensor_1_temp = reciprocal_input; - if (reciprocal_input.get_legacy_shape()[1] % 32 != 0) { + if (reciprocal_input.get_padded_shape()[1] % 32 != 0) { ttnn::SmallVector> padding = { - {0, 0}, {0, 32 - (reciprocal_input.get_legacy_shape()[1] % 32)}, {0, 0}, {0, 0}}; + {0, 0}, {0, 32 - (reciprocal_input.get_padded_shape()[1] % 32)}, {0, 0}, {0, 0}}; tensor_1_temp = ttnn::pad(0, reciprocal_input, padding, 0, true, std::nullopt); } ttnn::SmallVector after_permute_dims = {0, 2, 3, 1}; @@ -1945,13 +1945,13 @@ std::vector ExecuteUnaryBackwardProd::invoke( after_permute_dims, output_memory_config); Tensor grad_result = result; - if (reciprocal_input.get_legacy_shape()[1] % 32 != 0) { + if (reciprocal_input.get_padded_shape()[1] % 32 != 0) { ttnn::SmallVector start_index = {0, 0, 0, 0}; ttnn::SmallVector end_index = { - input.get_legacy_shape()[0], - input.get_legacy_shape()[1], - input.get_legacy_shape()[2], - input.get_legacy_shape()[3]}; + input.get_padded_shape()[0], + input.get_padded_shape()[1], + input.get_padded_shape()[2], + input.get_padded_shape()[3]}; auto step = ttnn::SmallVector({1, 1, 1, 1}); grad_result = ttnn::slice(DefaultQueueId, result, start_index, end_index, step, std::nullopt); } @@ -1960,9 +1960,9 @@ std::vector ExecuteUnaryBackwardProd::invoke( } // dim 0 Tensor tensor_1_temp = reciprocal_input; - if (reciprocal_input.get_legacy_shape()[0] % 32 != 0) { + if (reciprocal_input.get_padded_shape()[0] % 32 != 0) { ttnn::SmallVector> padding = { - {0, (32 - (reciprocal_input.get_legacy_shape()[0] % 32))}, {0, 0}, {0, 0}, {0, 0}}; + {0, (32 - (reciprocal_input.get_padded_shape()[0] % 32))}, {0, 0}, {0, 0}, {0, 0}}; tensor_1_temp = ttnn::pad(0, reciprocal_input, padding, 0, false, std::nullopt); } ttnn::SmallVector after_permute_dims = {3, 1, 2, 0}; @@ -1979,13 +1979,13 @@ std::vector ExecuteUnaryBackwardProd::invoke( after_permute_dims, output_memory_config); Tensor grad_result = result; - if (reciprocal_input.get_legacy_shape()[0] % 32 != 0) { + if (reciprocal_input.get_padded_shape()[0] % 32 != 0) { ttnn::SmallVector start_index = {0, 0, 0, 0}; ttnn::SmallVector end_index = { - input.get_legacy_shape()[0], - input.get_legacy_shape()[1], - input.get_legacy_shape()[2], - input.get_legacy_shape()[3]}; + input.get_padded_shape()[0], + input.get_padded_shape()[1], + input.get_padded_shape()[2], + input.get_padded_shape()[3]}; grad_result = ttnn::slice(DefaultQueueId, result, start_index, end_index, step, std::nullopt); } grad_tensor.emplace_back(grad_result);