From 53d15d1b35fdd647cd6525c71001d153afc3031f Mon Sep 17 00:00:00 2001
From: mouliraj-mcw <mcw-melamurugan@ext.tenstorrent.com>
Date: Wed, 11 Dec 2024 11:45:17 +0000
Subject: [PATCH] #15642: Update shapes

---
 .../binary/device/binary_composite_op.cpp     |  6 ++--
 ...cast_height_multi_core_program_factory.cpp |  8 ++---
 ...core_sharded_optimized_program_factory.cpp |  8 ++---
 ...ght_multi_core_sharded_program_factory.cpp | 10 +++---
 ...dcast_width_multi_core_program_factory.cpp |  8 ++---
 ...lement_wise_multi_core_program_factory.cpp |  4 +--
 .../binary_backward/binary_backward.cpp       | 24 ++++++-------
 .../operations/eltwise/complex/complex.cpp    |  2 +-
 .../unary/device/unary_composite_op.cpp       |  6 ++--
 .../eltwise/unary_backward/unary_backward.cpp | 36 +++++++++----------
 10 files changed, 56 insertions(+), 56 deletions(-)
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp
index 196314d10b29..137c3872ee06 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp
@@ -437,9 +437,9 @@ Tensor _scatter(const Tensor& input_a, const Tensor& input_b, const std::optiona
  *   by running reshape.
  */
 Tensor _outer(const Tensor& input_a, const Tensor& input_b, const std::optional<MemoryConfig>& output_mem_config) {
-    const tt::tt_metal::LegacyShape s_a = input_a.get_legacy_shape();
-    const tt::tt_metal::LegacyShape s_b = input_b.get_legacy_shape();
-    auto num_ones = [](const tt::tt_metal::LegacyShape& s) -> uint32_t {
+    const ttnn::SimpleShape s_a = input_a.get_padded_shape();
+    const ttnn::SimpleShape s_b = input_b.get_padded_shape();
+    auto num_ones = [](const ttnn::SimpleShape& s) -> uint32_t {
         uint32_t num1s = 0;
         for (uint32_t idx = 0; idx < 4; idx++) {
             num1s += (uint32_t)(s[idx] == 1);
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_program_factory.cpp
index 6e3d972d2109..115e4c6c9e22 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_program_factory.cpp
@@ -41,8 +41,8 @@ BinaryDeviceOperation ::BroadcastHeightMultiCore::create(
     auto& output = tensor_return_value;
     auto bcast_math = binary_op_type_to_bcast_op_math(operation_attributes.binary_op_type);
 
-    const auto ashape = a.get_legacy_shape();
-    const auto bshape = b->get_legacy_shape();
+    const auto ashape = a.get_padded_shape();
+    const auto bshape = b->get_padded_shape();
     uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1;
     uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1;
     uint32_t H = ashape[-2];
@@ -238,8 +238,8 @@ void BinaryDeviceOperation ::BroadcastHeightMultiCore::override_runtime_argument
 
     auto dst_dram_buffer = output_tensor.buffer();
 
-    const auto ashape = input_tensor_a.get_legacy_shape();
-    const auto bshape = input_tensor_b->get_legacy_shape();
+    const auto ashape = input_tensor_a.get_padded_shape();
+    const auto bshape = input_tensor_b->get_padded_shape();
     uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1;
     uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1;
     uint32_t H = ashape[-2];
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_sharded_optimized_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_sharded_optimized_program_factory.cpp
index 5403ae82ed2c..167a2dd5682a 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_sharded_optimized_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_sharded_optimized_program_factory.cpp
@@ -41,8 +41,8 @@ BinaryDeviceOperation::BroadcastHeightMultiCoreShardedOptimized::create(
     auto& output = tensor_return_value;
     auto bcast_math = binary_op_type_to_bcast_op_math(operation_attributes.binary_op_type);
 
-    const auto ashape = a.get_legacy_shape();
-    const auto bshape = b->get_legacy_shape();
+    const auto ashape = a.get_padded_shape();
+    const auto bshape = b->get_padded_shape();
     uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1;
     uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1;
     uint32_t H = ashape[-2];
@@ -267,9 +267,9 @@ void BinaryDeviceOperation ::BroadcastHeightMultiCoreShardedOptimized::override_
     auto all_cores = shard_spec.grid;
     uint32_t ncores = shard_spec.num_cores();
     uint32_t Wt = 0, Ht = 0;
-    const auto ashape = input_tensor_a.get_legacy_shape();
+    const auto ashape = input_tensor_a.get_padded_shape();
     uint32_t N = ashape[0], C = ashape[1], H = ashape[2], W = ashape[3];
-    uint32_t bN = input_tensor_b->get_legacy_shape()[0];
+    uint32_t bN = input_tensor_b->get_padded_shape()[0];
     uint32_t NC = N * C;
     if (a.memory_config().memory_layout == TensorMemoryLayout::BLOCK_SHARDED) {
         Wt = shard_spec.shape[1] / TILE_WIDTH;
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_sharded_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_sharded_program_factory.cpp
index f15671250583..54661ca021f1 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_sharded_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_multi_core_sharded_program_factory.cpp
@@ -36,8 +36,8 @@ BinaryDeviceOperation::BroadcastHeightMultiCoreSharded::create(
     auto& output = tensor_return_value;
     auto bcast_math = binary_op_type_to_bcast_op_math(operation_attributes.binary_op_type);
 
-    const auto ashape = a.get_legacy_shape();
-    const auto bshape = b->get_legacy_shape();
+    const auto ashape = a.get_padded_shape();
+    const auto bshape = b->get_padded_shape();
     uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1;
     uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1;
     uint32_t H = ashape[-2];
@@ -127,7 +127,7 @@ BinaryDeviceOperation::BroadcastHeightMultiCoreSharded::create(
             .set_globally_allocated_address(*output.buffer());
     auto out_cb = tt_metal::CreateCircularBuffer(program, all_cores, output_cb_config);
 
-    uint32_t num_input_tiles = (b->get_legacy_shape()[-1] * output.element_size() + TILE_HW - 1) / TILE_HW;
+    uint32_t num_input_tiles = (b->get_padded_shape()[-1] * output.element_size() + TILE_HW - 1) / TILE_HW;
     uint32_t src1_cb_index = tt::CBIndex::c_1;
     tt_metal::CircularBufferConfig src1_cb_config =
         tt_metal::CircularBufferConfig(num_input_tiles * input1_tile_size, {{src1_cb_index, b_df}})
@@ -249,9 +249,9 @@ void BinaryDeviceOperation ::BroadcastHeightMultiCoreSharded::override_runtime_a
     auto all_cores = shard_spec.grid;
     uint32_t ncores = shard_spec.num_cores();
     uint32_t Wt = 0, Ht = 0;
-    const auto ashape = input_tensor_a.get_legacy_shape();
+    const auto ashape = input_tensor_a.get_padded_shape();
     uint32_t N = ashape[0], C = ashape[1], H = ashape[2], W = ashape[3];
-    uint32_t bN = input_tensor_b->get_legacy_shape()[0];
+    uint32_t bN = input_tensor_b->get_padded_shape()[0];
     uint32_t NC = N * C;
     if (a.memory_config().memory_layout == TensorMemoryLayout::BLOCK_SHARDED) {
         Wt = shard_spec.shape[1] / TILE_WIDTH;
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_width_multi_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_width_multi_core_program_factory.cpp
index 3ab77f8fab6f..9aabfe7ed445 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_width_multi_core_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_width_multi_core_program_factory.cpp
@@ -40,8 +40,8 @@ BinaryDeviceOperation::BroadcastWidthMultiCore::cached_program_t BinaryDeviceOpe
     auto& output = tensor_return_value;
     auto bcast_math = binary_op_type_to_bcast_op_math(operation_attributes.binary_op_type);
 
-    const auto ashape = a.get_legacy_shape();
-    const auto bshape = b->get_legacy_shape();
+    const auto ashape = a.get_padded_shape();
+    const auto bshape = b->get_padded_shape();
     uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1;
     uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1;
     uint32_t H = ashape[-2];
@@ -240,8 +240,8 @@ void BinaryDeviceOperation::BroadcastWidthMultiCore::override_runtime_arguments(
 
     auto dst_dram_buffer = output_tensor.buffer();
 
-    const auto ashape = input_tensor_a.get_legacy_shape();
-    const auto bshape = input_tensor_b->get_legacy_shape();
+    const auto ashape = input_tensor_a.get_padded_shape();
+    const auto bshape = input_tensor_b->get_padded_shape();
     uint32_t N = ashape.rank() >= 4 ? ashape[-4] : 1;
     uint32_t C = ashape.rank() >= 3 ? ashape[-3] : 1;
     uint32_t H = ashape[-2];
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp
index ceef285e7172..2cec32083e39 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp
@@ -97,8 +97,8 @@ inline __attribute__((always_inline)) void set_eltwise_binary_runtime_args(
         if (block_or_width_sharded) {
             block_size = block_width * block_height;
             end_core = (*shard_spec.value().grid.ranges().begin()).end_coord;
-            output_width = output.get_legacy_shape()[-1] / TILE_WIDTH;
-            uint32_t output_height = output.volume() / output.get_legacy_shape()[-1] / TILE_HEIGHT;
+            output_width = output.get_padded_shape()[-1] / TILE_WIDTH;
+            uint32_t output_height = output.volume() / output.get_padded_shape()[-1] / TILE_HEIGHT;
             last_unpadded_block_height = block_height - (round_up(output_height, block_height) - output_height);
             last_unpadded_block_width = block_width - (round_up(output_width, block_width) - output_width);
         }
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary_backward/binary_backward.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary_backward/binary_backward.cpp
index 0d647f99577a..c5421ddfe289 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary_backward/binary_backward.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary_backward/binary_backward.cpp
@@ -632,10 +632,10 @@ std::vector<std::optional<Tensor>> ExecuteBackwardConcat::invoke(
     if (are_required_outputs[0]) {
         ttnn::SmallVector<uint32_t> start_index = {0, 0, 0, 0};
         ttnn::SmallVector<uint32_t> end_index = {
-            input.get_legacy_shape()[0],
-            input.get_legacy_shape()[1],
-            input.get_legacy_shape()[2],
-            input.get_legacy_shape()[3]};
+            input.get_padded_shape()[0],
+            input.get_padded_shape()[1],
+            input.get_padded_shape()[2],
+            input.get_padded_shape()[3]};
         ttnn::SmallVector<uint32_t> step = {1, 1, 1, 1};
         ttnn::slice(queue_id, grad, start_index, end_index, step, std::nullopt, input_grad);
         grad_tensor[0] = input_grad;
@@ -644,19 +644,19 @@ std::vector<std::optional<Tensor>> ExecuteBackwardConcat::invoke(
     if (are_required_outputs[1]) {
         ttnn::SmallVector<uint32_t> start_index_2 = {0, 0, 0, 0};
         if (dim == 0) {
-            start_index_2 = {input.get_legacy_shape()[0], 0, 0, 0};
+            start_index_2 = {input.get_padded_shape()[0], 0, 0, 0};
         } else if (dim == 1) {
-            start_index_2 = {0, input.get_legacy_shape()[1], 0, 0};
+            start_index_2 = {0, input.get_padded_shape()[1], 0, 0};
         } else if (dim == 2) {
-            start_index_2 = {0, 0, input.get_legacy_shape()[2], 0};
+            start_index_2 = {0, 0, input.get_padded_shape()[2], 0};
         } else if (dim == 3) {
-            start_index_2 = {0, 0, 0, input.get_legacy_shape()[3]};
+            start_index_2 = {0, 0, 0, input.get_padded_shape()[3]};
         }
         ttnn::SmallVector<uint32_t> end_index_2 = {
-            grad.get_legacy_shape()[0],
-            grad.get_legacy_shape()[1],
-            grad.get_legacy_shape()[2],
-            grad.get_legacy_shape()[3]};
+            grad.get_padded_shape()[0],
+            grad.get_padded_shape()[1],
+            grad.get_padded_shape()[2],
+            grad.get_padded_shape()[3]};
         ttnn::SmallVector<uint32_t> step_2 = {1, 1, 1, 1};
         ttnn::slice(queue_id, grad, start_index_2, end_index_2, step_2, std::nullopt, other_grad);
         grad_tensor[1] = other_grad;
diff --git a/ttnn/cpp/ttnn/operations/eltwise/complex/complex.cpp b/ttnn/cpp/ttnn/operations/eltwise/complex/complex.cpp
index a8cde4ccf237..07fc6230f530 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/complex/complex.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/complex/complex.cpp
@@ -19,7 +19,7 @@ void ComplexTensor::deallocate() {
 }
 
 ComplexTensor CreateComplexTensor::invoke(const Tensor& real, const Tensor& imag) {
-    TT_ASSERT(real.get_legacy_shape() == imag.get_legacy_shape(), "Tensor shapes of real and imag should be identical");
+    TT_ASSERT(real.get_padded_shape() == imag.get_padded_shape(), "Tensor shapes of real and imag should be identical");
     return ComplexTensor({real, imag});
 }
 
diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp
index a170e3d98558..74818d861fae 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp
@@ -507,7 +507,7 @@ Tensor _variance_impl(
     const std::optional<MemoryConfig>& output_mem_config) {
     ttnn::SmallVector<int> dims = {2, 3};
     constexpr float correction = 0.0f;
-    auto shape_wh = y.get_legacy_shape();
+    auto shape_wh = y.get_padded_shape();
     float scale = 1.0f / ((float)(shape_wh[3] * shape_wh[2]) - correction);
     Tensor sqr_y_minus_mean_y = ttnn::square(y_minus_mean_y, output_mem_config);
     return ttnn::sum(sqr_y_minus_mean_y, dims, true, std::nullopt, std::nullopt, scale);
@@ -705,10 +705,10 @@ Tensor ExecuteUnaryCompositeThreshold::invoke(
 std::vector<Tensor> split_tensor_for_glu(
     const Tensor& input_a, int32_t dim, const std::optional<MemoryConfig>& output_mem_config) {
     std::vector<Tensor> t_split;
-    tt::tt_metal::LegacyShape inshape(input_a.get_legacy_shape());
+    ttnn::SimpleShape inshape(input_a.get_padded_shape());
     TT_FATAL(((inshape[dim] / 2) % tt::constants::TILE_WIDTH == 0), "Split tensor dimension should be in full tile");
     ttnn::SmallVector<uint32_t> s_a = {0, 0, 0, 0};
-    ttnn::SmallVector<uint32_t> e_a = {input_a.get_legacy_shape()[0], inshape[1], inshape[2], inshape[3] / 2};
+    ttnn::SmallVector<uint32_t> e_a = {input_a.get_padded_shape()[0], inshape[1], inshape[2], inshape[3] / 2};
 
     ttnn::SmallVector<uint32_t> s_b = {0, 0, 0, inshape[3] / 2};
     ttnn::SmallVector<uint32_t> e_b = {inshape[0], inshape[1], inshape[2], inshape[3]};
diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary_backward/unary_backward.cpp b/ttnn/cpp/ttnn/operations/eltwise/unary_backward/unary_backward.cpp
index 21c5853eca81..b4d518bdb10c 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/unary_backward/unary_backward.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/unary_backward/unary_backward.cpp
@@ -1780,10 +1780,10 @@ std::vector<Tensor> ExecuteUnaryBackwardRepeat::invoke(
     auto output_memory_config = output_mem_config.value_or(
         input.memory_config());  // TODO: Remove after ternary forward ops migration is completed
 
-    auto shape_wh = input.get_legacy_shape();
+    auto shape_wh = input.get_padded_shape();
     TT_FATAL(shape_wh[0] == 1 && "input shape[0] should be 1", "Error");
     auto ttnn_device = input.device();
-    // input.get_legacy_shape()[0]
+    // input.get_padded_shape()[0]
     // If repeat shape has 0's, it returns zeros of given input
     if (shape[0] == 0 || shape[1] == 0 || shape[2] == 0 || shape[3] == 0) {
         Tensor zero_tensor =
@@ -1880,7 +1880,7 @@ std::vector<Tensor> ExecuteUnaryBackwardProd::invoke(
             Tensor required = ttnn::permute(grad, after_permute_dims, output_memory_config);
             ttnn::SmallVector<uint32_t> start_index = {0, 0, 0, 0};
             ttnn::SmallVector<uint32_t> end_index = {
-                grad.get_legacy_shape()[0], 1, grad.get_legacy_shape()[1], grad.get_legacy_shape()[2]};
+                grad.get_padded_shape()[0], 1, grad.get_padded_shape()[1], grad.get_padded_shape()[2]};
             Tensor new_slice_tensor = ttnn::slice(DefaultQueueId, required, start_index, end_index, step, std::nullopt);
             after_permute_dims = {0, 2, 3, 1};
             updated_grad = ttnn::permute(new_slice_tensor, after_permute_dims, output_memory_config);
@@ -1895,7 +1895,7 @@ std::vector<Tensor> ExecuteUnaryBackwardProd::invoke(
             Tensor required = ttnn::permute(grad, after_permute_dims, output_memory_config);
             ttnn::SmallVector<uint32_t> start_index = {0, 0, 0, 0};
             ttnn::SmallVector<uint32_t> end_index = {
-                grad.get_legacy_shape()[0], 1, grad.get_legacy_shape()[1], grad.get_legacy_shape()[3]};
+                grad.get_padded_shape()[0], 1, grad.get_padded_shape()[1], grad.get_padded_shape()[3]};
             Tensor new_slice_tensor = ttnn::slice(DefaultQueueId, required, start_index, end_index, step, std::nullopt);
             updated_grad = ttnn::permute(new_slice_tensor, after_permute_dims, output_memory_config);
             if (updated_grad.get_layout() == Layout::ROW_MAJOR) {
@@ -1925,9 +1925,9 @@ std::vector<Tensor> ExecuteUnaryBackwardProd::invoke(
         return grad_tensor;
     } else if (dim == 1 || dim == -3) {
         Tensor tensor_1_temp = reciprocal_input;
-        if (reciprocal_input.get_legacy_shape()[1] % 32 != 0) {
+        if (reciprocal_input.get_padded_shape()[1] % 32 != 0) {
             ttnn::SmallVector<std::pair<uint32_t, uint32_t>> padding = {
-                {0, 0}, {0, 32 - (reciprocal_input.get_legacy_shape()[1] % 32)}, {0, 0}, {0, 0}};
+                {0, 0}, {0, 32 - (reciprocal_input.get_padded_shape()[1] % 32)}, {0, 0}, {0, 0}};
             tensor_1_temp = ttnn::pad(0, reciprocal_input, padding, 0, true, std::nullopt);
         }
         ttnn::SmallVector<int64_t> after_permute_dims = {0, 2, 3, 1};
@@ -1945,13 +1945,13 @@ std::vector<Tensor> ExecuteUnaryBackwardProd::invoke(
             after_permute_dims,
             output_memory_config);
         Tensor grad_result = result;
-        if (reciprocal_input.get_legacy_shape()[1] % 32 != 0) {
+        if (reciprocal_input.get_padded_shape()[1] % 32 != 0) {
             ttnn::SmallVector<uint32_t> start_index = {0, 0, 0, 0};
             ttnn::SmallVector<uint32_t> end_index = {
-                input.get_legacy_shape()[0],
-                input.get_legacy_shape()[1],
-                input.get_legacy_shape()[2],
-                input.get_legacy_shape()[3]};
+                input.get_padded_shape()[0],
+                input.get_padded_shape()[1],
+                input.get_padded_shape()[2],
+                input.get_padded_shape()[3]};
             auto step = ttnn::SmallVector<uint32_t>({1, 1, 1, 1});
             grad_result = ttnn::slice(DefaultQueueId, result, start_index, end_index, step, std::nullopt);
         }
@@ -1960,9 +1960,9 @@ std::vector<Tensor> ExecuteUnaryBackwardProd::invoke(
     }
     // dim 0
     Tensor tensor_1_temp = reciprocal_input;
-    if (reciprocal_input.get_legacy_shape()[0] % 32 != 0) {
+    if (reciprocal_input.get_padded_shape()[0] % 32 != 0) {
         ttnn::SmallVector<std::pair<uint32_t, uint32_t>> padding = {
-            {0, (32 - (reciprocal_input.get_legacy_shape()[0] % 32))}, {0, 0}, {0, 0}, {0, 0}};
+            {0, (32 - (reciprocal_input.get_padded_shape()[0] % 32))}, {0, 0}, {0, 0}, {0, 0}};
         tensor_1_temp = ttnn::pad(0, reciprocal_input, padding, 0, false, std::nullopt);
     }
     ttnn::SmallVector<int64_t> after_permute_dims = {3, 1, 2, 0};
@@ -1979,13 +1979,13 @@ std::vector<Tensor> ExecuteUnaryBackwardProd::invoke(
         after_permute_dims,
         output_memory_config);
     Tensor grad_result = result;
-    if (reciprocal_input.get_legacy_shape()[0] % 32 != 0) {
+    if (reciprocal_input.get_padded_shape()[0] % 32 != 0) {
         ttnn::SmallVector<uint32_t> start_index = {0, 0, 0, 0};
         ttnn::SmallVector<uint32_t> end_index = {
-            input.get_legacy_shape()[0],
-            input.get_legacy_shape()[1],
-            input.get_legacy_shape()[2],
-            input.get_legacy_shape()[3]};
+            input.get_padded_shape()[0],
+            input.get_padded_shape()[1],
+            input.get_padded_shape()[2],
+            input.get_padded_shape()[3]};
         grad_result = ttnn::slice(DefaultQueueId, result, start_index, end_index, step, std::nullopt);
     }
     grad_tensor.emplace_back(grad_result);