#10505: migrate prod

tenstorrent · Jul 23, 2024 · de5eee5 · de5eee5
1 parent 032372c
commit de5eee5
Show file tree

Hide file tree

Showing 21 changed files with 254 additions and 147 deletions.
diff --git a/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py b/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py
@@ -1620,7 +1620,7 @@ def prod(
     **kwargs,
 ):
     t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0])
-    t1 = ttl.tensor.prod(t0, all_dimensions, dim, output_mem_config=output_mem_config)
+    t1 = ttnn.prod(t0, all_dimensions, dim, memory_config=output_mem_config)
     output_tensor = ttnn.from_device(t1)
     output_tensor = ttnn.to_layout(output_tensor, ttnn.ROW_MAJOR_LAYOUT)
     output_tensor = ttnn.to_torch(output_tensor)

diff --git a/ttnn/CMakeLists.txt b/ttnn/CMakeLists.txt
@@ -67,6 +67,11 @@ set(TTNN_SRCS
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/maxpool/device/max_pool_program_factory.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/prod/prod.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/prod/prod_nc_op.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/prod/prod_op_all.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/prod/prod_nc/prod_nc_program_factory.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/prod/single_core/prod_all_program_factory.cpp
 )
 
 ### Setup TTNN as a shared library with optional Python bindings

diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/CMakeLists.txt b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/CMakeLists.txt
@@ -85,10 +85,6 @@ set(TT_DNN_SRCS
     ${CMAKE_CURRENT_SOURCE_DIR}/moreh_sum/moreh_sum_nc_impl/moreh_int_sum_nc_impl.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/moreh_sum/moreh_sum_op.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/moreh_sum_backward/moreh_sum_backward_impl/moreh_sum_backward_impl.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/prod/prod_nc/prod_nc.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/prod/prod_nc_op.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/prod/prod_op_all.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/prod/single_core/prod_op_all_single_core.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/moreh_sum_backward/moreh_sum_backward_op.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/moreh_mean/moreh_mean_h/moreh_mean_h.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/moreh_mean/moreh_mean_w/moreh_mean_w.cpp

diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.cpp b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.cpp
@@ -9,8 +9,6 @@
 #include "ttnn/deprecated/tt_dnn/op_library/copy/copy_op.hpp"
 #include "ttnn/deprecated/tt_dnn/op_library/math.hpp"
 #include "ttnn/deprecated/tt_dnn/op_library/optimizer/optimizer_ops.hpp"
-#include "ttnn/deprecated/tt_dnn/op_library/prod/prod_nc_op.hpp"
-#include "ttnn/deprecated/tt_dnn/op_library/prod/prod_op_all.hpp"
 #include "ttnn/deprecated/tt_dnn/op_library/reduce/reduce_op.hpp"
 #include "ttnn/deprecated/tt_dnn/op_library/reshape/reshape_op.hpp"
 #include "ttnn/tensor/tensor_impl.hpp"
@@ -1283,95 +1281,6 @@ Tensor celu(const Tensor& input_a, float alpha, const MemoryConfig& output_mem_c
     return operation::decorate_as_composite(__func__, _celu)(input_a, alpha, output_mem_config);
 }
 
-Tensor prod_all(const Tensor& input_a, const MemoryConfig& output_mem_config) {
-    auto formatted_input_tensor = input_a;
-    if (formatted_input_tensor.get_layout() == Layout::ROW_MAJOR) {
-        auto a_pad_shape = AutoFormat::pad_to_tile_shape(input_a.get_legacy_shape(), false, false, true, true);
-        auto out_shape = input_a.get_legacy_shape();
-        out_shape = {out_shape[0], out_shape[1], out_shape[2], out_shape[3]};
-        if (!AutoFormat::check_input_tensor_format(input_a, a_pad_shape)) {
-            formatted_input_tensor =
-                AutoFormat::format_input_tensor(input_a, input_a.device(), a_pad_shape, 1.0, Layout::TILE);
-        }
-    }
-    return tt::operations::primary::prod_all(formatted_input_tensor, output_mem_config);
-}
-
-Tensor prod_nc(const Tensor& temp, int64_t dim, const MemoryConfig& output_mem_config) {
-    // layout conversion
-    auto formatted_input_tensor = temp;
-    if (formatted_input_tensor.get_layout() == Layout::ROW_MAJOR) {
-        auto a_pad_shape = AutoFormat::pad_to_tile_shape(temp.get_legacy_shape(), false, false, true, true);
-        auto out_shape = temp.get_legacy_shape();
-        out_shape = {out_shape[0], out_shape[1], out_shape[2], out_shape[3]};
-        if (!AutoFormat::check_input_tensor_format(temp, a_pad_shape)) {
-            formatted_input_tensor =
-                AutoFormat::format_input_tensor(temp, temp.device(), a_pad_shape, 1.0, Layout::TILE);
-        }
-    }
-    // Apply prod
-    std::vector<int64_t> dimension = {(dim == 1 || dim == -3) ? 1 : 0};
-    Shape input_shape = formatted_input_tensor.get_legacy_shape();
-    Shape required = {
-        ((dim == 1 || dim == -3) ? input_shape[0] : 1),
-        ((dim == 1 || dim == -3) ? 1 : input_shape[1]),
-        input_shape[2],
-        input_shape[3]};
-    return tt::operations::primary::prod_nc(
-        formatted_input_tensor,
-        zeros(
-            required,
-            formatted_input_tensor.get_dtype(),
-            formatted_input_tensor.get_layout(),
-            formatted_input_tensor.device(),
-            output_mem_config),
-        dimension,
-        output_mem_config);
-}
-
-Tensor _prod(const Tensor& input_a, bool all_dimensions, int64_t dim, const MemoryConfig& output_mem_config) {
-    if (all_dimensions) {
-        return tt::tt_metal::prod_all(input_a, output_mem_config);
-    }
-    TT_FATAL(dim >= -4 && dim <= 3 && "Dimension out of range (expected to be in range of [-4, 3]");
-    Tensor temp = input_a;
-    // Permute for dim 2,3
-    if (dim == 2 || dim == -2) {
-        std::vector<int64_t> permute_dims = {2, 0, 1, 3};
-        temp = ttnn::permute(input_a, permute_dims, output_mem_config);
-    } else if (dim == 3 || dim == -1) {
-        std::vector<int64_t> permute_dims = {3, 0, 1, 2};
-        temp = ttnn::permute(input_a, permute_dims, output_mem_config);
-    }
-    Tensor result = tt::tt_metal::prod_nc(temp, dim, output_mem_config);
-    // Permute and unpad result for dim 2,3
-    if (dim == 0 || dim == 1 || dim == -4 || dim == -3) {
-        return result;
-    } else if (dim == 2 || dim == -2) {
-        std::vector<int64_t> after_permute_dims = {1, 2, 0, 3};
-        Tensor required = ttnn::permute(result, after_permute_dims, output_mem_config);
-        Shape input_shape = input_a.get_legacy_shape();
-        std::vector<uint32_t> start_index = {0, 0, 0, 0};
-        std::vector<uint32_t> end_index = {input_shape[0] - 1, input_shape[1] - 1, 0, input_shape[3] - 1};
-        return ttnn::slice(0, required, start_index, end_index, std::nullopt);
-    } else {  // dim 3
-        // permute
-        std::vector<int64_t> after_permute_dims = {1, 2, 0, 3};
-        Tensor required = ttnn::permute(result, after_permute_dims, output_mem_config);
-        // unpad
-        Shape input_shape = input_a.get_legacy_shape();
-        std::vector<uint32_t> start_index = {0, 0, 0, 0};
-        std::vector<uint32_t> end_index = {input_shape[0] - 1, input_shape[1] - 1, 0, input_shape[2] - 1};
-        Tensor new_unpad_tensor = ttnn::slice(0, required, start_index, end_index, std::nullopt);
-        // permute back
-        after_permute_dims = {0, 1, 3, 2};
-        return ttnn::permute(new_unpad_tensor, after_permute_dims, output_mem_config);
-    }
-}
-
-Tensor prod(const Tensor& input_a, bool all_dimensions, int64_t dim, const MemoryConfig& output_mem_config) {
-    return operation::decorate_as_composite(__func__, _prod)(input_a, all_dimensions, dim, output_mem_config);
-}
 
 Tensor _variance_impl(
     const Tensor& y, const Tensor& mean_y, Tensor& y_minus_mean_y, const MemoryConfig& output_mem_config) {

diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.hpp b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.hpp
@@ -236,12 +236,6 @@ Tensor logical_noti(
     float immediate,
     const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);
 
-// prod
-Tensor prod(
-    const Tensor& input_a,
-    bool all_dimensions = false,
-    int64_t dim = 0,
-    const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);
 
 /*
 Returns a new tensor with the signed angles in radians between vectors

diff --git a/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/operations/primary/module.hpp b/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/operations/primary/module.hpp
@@ -39,8 +39,8 @@
 #include "ttnn/deprecated/tt_dnn/op_library/moreh_softmax_backward/moreh_softmax_backward_op.hpp"
 #include "ttnn/deprecated/tt_dnn/op_library/moreh_sum/moreh_sum_op.hpp"
 #include "ttnn/deprecated/tt_dnn/op_library/moreh_sum_backward/moreh_sum_backward_op.hpp"
-#include "ttnn/deprecated/tt_dnn/op_library/prod/prod_nc_op.hpp"
-#include "ttnn/deprecated/tt_dnn/op_library/prod/prod_op_all.hpp"
+#include "ttnn/operations/eltwise/prod/prod_nc_op.hpp"
+#include "ttnn/operations/eltwise/prod/prod_op_all.hpp"
 
 namespace py = pybind11;
 

diff --git a/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/tt_lib_bindings_tensor_composite_ops.cpp b/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/tt_lib_bindings_tensor_composite_ops.cpp
@@ -336,29 +336,6 @@ void TensorModuleCompositeOPs(py::module& m_tensor) {
         py::arg("dim") = -1,
         R"doc(Applies the Gated Linear Units (GLU) function to the elements of the input tensor ``{0}`` split along dim ``{1}``.)doc",
         R"doc(dimension to split)doc");
-    m_tensor.def(
-        "prod",
-        &prod,
-        py::arg("input").noconvert(),
-        py::arg("all_dimensions") = false,
-        py::arg("dim") = 0,
-        py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG,
-        R"doc(
-            Computes the prod function along specified ``dim`` or all dimensions on the ``input`` tensor.
-            If ``all_dimensions`` is set to ``true`` irrespective of given dimension it will prod along all dimensions.
-
-            Input tensor must have BFLOAT16 data type.
-
-            Output tensor will have BFLOAT16 data type.
-
-            .. csv-table::
-                :header: "Argument", "Description", "Data type", "Valid range", "Required"
-
-                "input", "Tensor prod is applied to", "Tensor", "Tensor of shape [W, Z, Y, X]", "Yes"
-                "all_dimensions", "Consider all dimension (ignores ``dim`` param)", "bool", "default to false", "No"
-                "dim", "Dimension to perform prod", "int", "default to 0", "Yes"
-                "output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No"
-        )doc");
     detail::bind_unary_op_with_param(
         m_tensor,
         "geglu",

diff --git a/...library/prod/kernels/compute/prod_all.cpp → .../prod/device/kernels/compute/prod_all.cpp b/...library/prod/kernels/compute/prod_all.cpp → .../prod/device/kernels/compute/prod_all.cpp
diff --git a/..._library/prod/kernels/compute/prod_nc.cpp → ...e/prod/device/kernels/compute/prod_nc.cpp b/..._library/prod/kernels/compute/prod_nc.cpp → ...e/prod/device/kernels/compute/prod_nc.cpp
diff --git a/.../prod/kernels/dataflow/reader_prod_nc.cpp → ...evice/kernels/dataflow/reader_prod_nc.cpp b/.../prod/kernels/dataflow/reader_prod_nc.cpp → ...evice/kernels/dataflow/reader_prod_nc.cpp
diff --git a/...p_library/prod/kernels/dataflow/utils.hpp → ...se/prod/device/kernels/dataflow/utils.hpp b/...p_library/prod/kernels/dataflow/utils.hpp → ...se/prod/device/kernels/dataflow/utils.hpp
diff --git a/ttnn/cpp/ttnn/operations/eltwise/prod/prod.cpp b/ttnn/cpp/ttnn/operations/eltwise/prod/prod.cpp
@@ -0,0 +1,135 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+
+#include "prod.hpp"
+#include "prod_nc_op.hpp"
+#include "prod_op_all.hpp"
+#include "ttnn/deprecated/tt_dnn/op_library/auto_format.hpp"
+#include "ttnn/cpp/ttnn/operations/creation.hpp"
+#include "ttnn/operations/data_movement/slice/slice.hpp"
+#include "ttnn/operations/data_movement/permute/permute.hpp"
+#include "tt_numpy/functions.hpp"
+
+
+namespace ttnn {
+
+namespace operations {
+
+namespace prod {
+
+inline Tensor zeros(
+    const tt::tt_metal::Shape shape, DataType data_type, Layout layout, Device* device, const MemoryConfig& output_mem_config) {
+    return tt::numpy::zeros(shape, data_type, layout, device, output_mem_config);
+}
+
+// Autoformat support
+inline Tensor change_layout_to_tile(const Tensor& temp, const MemoryConfig& output_mem_config) {
+    auto formatted_input_tensor = temp;
+    if(formatted_input_tensor.get_layout()==Layout::ROW_MAJOR){
+        auto a_pad_shape = AutoFormat::pad_to_tile_shape(temp.get_legacy_shape(), false, false, true, true);
+        if (!AutoFormat::check_input_tensor_format(temp, a_pad_shape)) {
+            formatted_input_tensor = AutoFormat::format_input_tensor(temp, temp.device(), a_pad_shape, 1.0, Layout::TILE);
+        }
+    }
+    return formatted_input_tensor;
+}
+
+inline Tensor prod_all(const Tensor& input_a, const MemoryConfig& output_mem_config) {
+    auto formatted_input_tensor = input_a;
+    if (formatted_input_tensor.get_layout() == Layout::ROW_MAJOR) {
+        auto a_pad_shape = AutoFormat::pad_to_tile_shape(input_a.get_legacy_shape(), false, false, true, true);
+        auto out_shape = input_a.get_legacy_shape();
+        out_shape = {out_shape[0], out_shape[1], out_shape[2], out_shape[3]};
+        if (!AutoFormat::check_input_tensor_format(input_a, a_pad_shape)) {
+            formatted_input_tensor =
+                AutoFormat::format_input_tensor(input_a, input_a.device(), a_pad_shape, 1.0, Layout::TILE);
+        }
+    }
+    return tt::operations::primary::prod_all(formatted_input_tensor, output_mem_config);
+}
+
+inline Tensor prod_nc(const Tensor& temp, int64_t dim, const MemoryConfig& output_mem_config) {
+    // layout conversion
+    auto formatted_input_tensor = temp;
+    if (formatted_input_tensor.get_layout() == Layout::ROW_MAJOR) {
+        auto a_pad_shape = AutoFormat::pad_to_tile_shape(temp.get_legacy_shape(), false, false, true, true);
+        auto out_shape = temp.get_legacy_shape();
+        out_shape = {out_shape[0], out_shape[1], out_shape[2], out_shape[3]};
+        if (!AutoFormat::check_input_tensor_format(temp, a_pad_shape)) {
+            formatted_input_tensor =
+                AutoFormat::format_input_tensor(temp, temp.device(), a_pad_shape, 1.0, Layout::TILE);
+        }
+    }
+    // Apply prod
+    std::vector<int64_t> dimension = {(dim == 1 || dim == -3) ? 1 : 0};
+    tt::tt_metal::Shape input_shape = formatted_input_tensor.get_legacy_shape();
+    tt::tt_metal::Shape required = {
+        ((dim == 1 || dim == -3) ? input_shape[0] : 1),
+        ((dim == 1 || dim == -3) ? 1 : input_shape[1]),
+        input_shape[2],
+        input_shape[3]};
+    return tt::operations::primary::prod_nc(
+        formatted_input_tensor,
+        zeros(
+            required,
+            formatted_input_tensor.get_dtype(),
+            formatted_input_tensor.get_layout(),
+            formatted_input_tensor.device(),
+            output_mem_config),
+        dimension,
+        output_mem_config);
+}
+
+
+Tensor ProdOp::_prod(const Tensor& input_a, bool all_dimensions, int64_t dim, const MemoryConfig& output_mem_config) {
+    if (all_dimensions) {
+        return prod_all(input_a, output_mem_config);
+    }
+    TT_FATAL(dim >= -4 && dim <= 3 && "Dimension out of range (expected to be in range of [-4, 3]");
+    Tensor temp = input_a;
+    // Permute for dim 2,3
+    if (dim == 2 || dim == -2) {
+        std::vector<int64_t> permute_dims = {2, 0, 1, 3};
+        temp = ttnn::permute(input_a, permute_dims, output_mem_config);
+    } else if (dim == 3 || dim == -1) {
+        std::vector<int64_t> permute_dims = {3, 0, 1, 2};
+        temp = ttnn::permute(input_a, permute_dims, output_mem_config);
+    }
+    Tensor result = prod_nc(temp, dim, output_mem_config);
+    // Permute and unpad result for dim 2,3
+    if (dim == 0 || dim == 1 || dim == -4 || dim == -3) {
+        return result;
+    } else if (dim == 2 || dim == -2) {
+        std::vector<int64_t> after_permute_dims = {1, 2, 0, 3};
+        Tensor required = ttnn::permute(result, after_permute_dims, output_mem_config);
+        tt::tt_metal::Shape input_shape = input_a.get_legacy_shape();
+        std::vector<uint32_t> start_index = {0, 0, 0, 0};
+        std::vector<uint32_t> end_index = {input_shape[0] - 1, input_shape[1] - 1, 0, input_shape[3] - 1};
+        return ttnn::slice(0, required, start_index, end_index, std::nullopt);
+    } else {  // dim 3
+        // permute
+        std::vector<int64_t> after_permute_dims = {1, 2, 0, 3};
+        Tensor required = ttnn::permute(result, after_permute_dims, output_mem_config);
+        // unpad
+        tt::tt_metal::Shape input_shape = input_a.get_legacy_shape();
+        std::vector<uint32_t> start_index = {0, 0, 0, 0};
+        std::vector<uint32_t> end_index = {input_shape[0] - 1, input_shape[1] - 1, 0, input_shape[2] - 1};
+        Tensor new_unpad_tensor = ttnn::slice(0, required, start_index, end_index, std::nullopt);
+        // permute back
+        after_permute_dims = {0, 1, 3, 2};
+        Tensor res_host = ttnn::permute(new_unpad_tensor, after_permute_dims, output_mem_config);
+        if(res_host.storage_type() != StorageType::DEVICE or res_host.storage_type() != StorageType::MULTI_DEVICE) {
+            res_host = res_host.pad_to_tile(0.0f);
+            res_host = res_host.to(Layout::TILE);
+            res_host = res_host.to(input_a.device());
+        }
+        return res_host;
+    }
+}
+
+
+} // namespace prod
+} // namespace operations
+} // namespace ttnn
diff --git a/ttnn/cpp/ttnn/operations/eltwise/prod/prod.hpp b/ttnn/cpp/ttnn/operations/eltwise/prod/prod.hpp
@@ -0,0 +1,44 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <optional>
+#include <functional>
+
+#include "ttnn/decorators.hpp"
+#include "ttnn/operations/core/core.hpp"
+
+namespace ttnn {
+
+namespace operations {
+
+namespace prod {
+
+    struct ProdOp {
+        static Tensor _prod(
+            const Tensor& input_a,
+            bool all_dimensions,
+            int64_t dim,
+            const MemoryConfig& output_mem_config);
+    };
+
+
+    struct ExecuteProdOp {
+        static Tensor execute_on_worker_thread(
+            const Tensor& input,
+            bool all_dimensions = false,
+            int64_t dim = 0,
+            const std::optional<MemoryConfig>& memory_config = std::nullopt) {
+
+            return ProdOp::_prod(input, all_dimensions, dim, memory_config.value_or(input.memory_config()));
+        }
+    };
+
+} // namespace prod
+} // namespace operations
+
+constexpr auto prod = ttnn::register_operation<operations::prod::ExecuteProdOp>("ttnn::prod");
+
+} // namespace ttnn
diff --git a/...t_dnn/op_library/prod/prod_nc/prod_nc.cpp → .../prod/prod_nc/prod_nc_program_factory.cpp b/...t_dnn/op_library/prod/prod_nc/prod_nc.cpp → .../prod/prod_nc/prod_nc_program_factory.cpp
@@ -97,7 +97,7 @@ operation::ProgramWithCallbacks prod_nc_format(const Tensor &input, const Tensor
     bool output_is_dram = output_buffer_type->buffer_type() == tt_metal::BufferType::DRAM ? 1 : 0;
     std::vector<uint32_t> writer_compile_time_args = {(std::uint32_t) cb_id_out, (std::uint32_t) output_is_dram};
 
-    const auto reader_kernel_file = "ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/prod/kernels/dataflow/reader_prod_nc.cpp";
+    const auto reader_kernel_file = "ttnn/cpp/ttnn/operations/eltwise/prod/device/kernels/dataflow/reader_prod_nc.cpp";
     const auto writer_kernel_file = "ttnn/cpp/ttnn/operations/eltwise/unary/device/kernels/dataflow/writer_unary_interleaved_start_id.cpp";
     const auto reader_kernel_id = CreateReadKernel(program, reader_kernel_file, all_cores, reader_compile_time_args);
     const auto writer_kernel_id = CreateWriteKernel(program, writer_kernel_file, all_cores, writer_compile_time_args);
@@ -107,7 +107,8 @@ operation::ProgramWithCallbacks prod_nc_format(const Tensor &input, const Tensor
     ////////////////////////////////////////////////////////////////////////////
     const std::vector<uint32_t> compute_args_group_1{num_cols_per_core_group_1};
     std::map<string, string> compute_defines;
-    const auto compute_kernel_file = "ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/prod/kernels/compute/prod_nc.cpp";
+
+    const auto compute_kernel_file = "ttnn/cpp/ttnn/operations/eltwise/prod/device/kernels/compute/prod_nc.cpp";
     const auto compute_kernel_1_id = CreateComputeKernel(
         program, compute_kernel_file, {core_group_1, num_cols_per_core_group_1, compute_args_group_1}, compute_defines);