Skip to content

Commit

Permalink
#10505: migrate prod
Browse files Browse the repository at this point in the history
  • Loading branch information
KalaivaniMCW committed Jul 23, 2024
1 parent 032372c commit de5eee5
Show file tree
Hide file tree
Showing 21 changed files with 254 additions and 147 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1620,7 +1620,7 @@ def prod(
**kwargs,
):
t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0])
t1 = ttl.tensor.prod(t0, all_dimensions, dim, output_mem_config=output_mem_config)
t1 = ttnn.prod(t0, all_dimensions, dim, memory_config=output_mem_config)
output_tensor = ttnn.from_device(t1)
output_tensor = ttnn.to_layout(output_tensor, ttnn.ROW_MAJOR_LAYOUT)
output_tensor = ttnn.to_torch(output_tensor)
Expand Down
5 changes: 5 additions & 0 deletions ttnn/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ set(TTNN_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/maxpool/device/max_pool_program_factory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/prod/prod.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/prod/prod_nc_op.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/prod/prod_op_all.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/prod/prod_nc/prod_nc_program_factory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/prod/single_core/prod_all_program_factory.cpp
)

### Setup TTNN as a shared library with optional Python bindings
Expand Down
4 changes: 0 additions & 4 deletions ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,6 @@ set(TT_DNN_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/moreh_sum/moreh_sum_nc_impl/moreh_int_sum_nc_impl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/moreh_sum/moreh_sum_op.cpp
${CMAKE_CURRENT_SOURCE_DIR}/moreh_sum_backward/moreh_sum_backward_impl/moreh_sum_backward_impl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/prod/prod_nc/prod_nc.cpp
${CMAKE_CURRENT_SOURCE_DIR}/prod/prod_nc_op.cpp
${CMAKE_CURRENT_SOURCE_DIR}/prod/prod_op_all.cpp
${CMAKE_CURRENT_SOURCE_DIR}/prod/single_core/prod_op_all_single_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/moreh_sum_backward/moreh_sum_backward_op.cpp
${CMAKE_CURRENT_SOURCE_DIR}/moreh_mean/moreh_mean_h/moreh_mean_h.cpp
${CMAKE_CURRENT_SOURCE_DIR}/moreh_mean/moreh_mean_w/moreh_mean_w.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
#include "ttnn/deprecated/tt_dnn/op_library/copy/copy_op.hpp"
#include "ttnn/deprecated/tt_dnn/op_library/math.hpp"
#include "ttnn/deprecated/tt_dnn/op_library/optimizer/optimizer_ops.hpp"
#include "ttnn/deprecated/tt_dnn/op_library/prod/prod_nc_op.hpp"
#include "ttnn/deprecated/tt_dnn/op_library/prod/prod_op_all.hpp"
#include "ttnn/deprecated/tt_dnn/op_library/reduce/reduce_op.hpp"
#include "ttnn/deprecated/tt_dnn/op_library/reshape/reshape_op.hpp"
#include "ttnn/tensor/tensor_impl.hpp"
Expand Down Expand Up @@ -1283,95 +1281,6 @@ Tensor celu(const Tensor& input_a, float alpha, const MemoryConfig& output_mem_c
return operation::decorate_as_composite(__func__, _celu)(input_a, alpha, output_mem_config);
}

Tensor prod_all(const Tensor& input_a, const MemoryConfig& output_mem_config) {
auto formatted_input_tensor = input_a;
if (formatted_input_tensor.get_layout() == Layout::ROW_MAJOR) {
auto a_pad_shape = AutoFormat::pad_to_tile_shape(input_a.get_legacy_shape(), false, false, true, true);
auto out_shape = input_a.get_legacy_shape();
out_shape = {out_shape[0], out_shape[1], out_shape[2], out_shape[3]};
if (!AutoFormat::check_input_tensor_format(input_a, a_pad_shape)) {
formatted_input_tensor =
AutoFormat::format_input_tensor(input_a, input_a.device(), a_pad_shape, 1.0, Layout::TILE);
}
}
return tt::operations::primary::prod_all(formatted_input_tensor, output_mem_config);
}

Tensor prod_nc(const Tensor& temp, int64_t dim, const MemoryConfig& output_mem_config) {
// layout conversion
auto formatted_input_tensor = temp;
if (formatted_input_tensor.get_layout() == Layout::ROW_MAJOR) {
auto a_pad_shape = AutoFormat::pad_to_tile_shape(temp.get_legacy_shape(), false, false, true, true);
auto out_shape = temp.get_legacy_shape();
out_shape = {out_shape[0], out_shape[1], out_shape[2], out_shape[3]};
if (!AutoFormat::check_input_tensor_format(temp, a_pad_shape)) {
formatted_input_tensor =
AutoFormat::format_input_tensor(temp, temp.device(), a_pad_shape, 1.0, Layout::TILE);
}
}
// Apply prod
std::vector<int64_t> dimension = {(dim == 1 || dim == -3) ? 1 : 0};
Shape input_shape = formatted_input_tensor.get_legacy_shape();
Shape required = {
((dim == 1 || dim == -3) ? input_shape[0] : 1),
((dim == 1 || dim == -3) ? 1 : input_shape[1]),
input_shape[2],
input_shape[3]};
return tt::operations::primary::prod_nc(
formatted_input_tensor,
zeros(
required,
formatted_input_tensor.get_dtype(),
formatted_input_tensor.get_layout(),
formatted_input_tensor.device(),
output_mem_config),
dimension,
output_mem_config);
}

Tensor _prod(const Tensor& input_a, bool all_dimensions, int64_t dim, const MemoryConfig& output_mem_config) {
if (all_dimensions) {
return tt::tt_metal::prod_all(input_a, output_mem_config);
}
TT_FATAL(dim >= -4 && dim <= 3 && "Dimension out of range (expected to be in range of [-4, 3]");
Tensor temp = input_a;
// Permute for dim 2,3
if (dim == 2 || dim == -2) {
std::vector<int64_t> permute_dims = {2, 0, 1, 3};
temp = ttnn::permute(input_a, permute_dims, output_mem_config);
} else if (dim == 3 || dim == -1) {
std::vector<int64_t> permute_dims = {3, 0, 1, 2};
temp = ttnn::permute(input_a, permute_dims, output_mem_config);
}
Tensor result = tt::tt_metal::prod_nc(temp, dim, output_mem_config);
// Permute and unpad result for dim 2,3
if (dim == 0 || dim == 1 || dim == -4 || dim == -3) {
return result;
} else if (dim == 2 || dim == -2) {
std::vector<int64_t> after_permute_dims = {1, 2, 0, 3};
Tensor required = ttnn::permute(result, after_permute_dims, output_mem_config);
Shape input_shape = input_a.get_legacy_shape();
std::vector<uint32_t> start_index = {0, 0, 0, 0};
std::vector<uint32_t> end_index = {input_shape[0] - 1, input_shape[1] - 1, 0, input_shape[3] - 1};
return ttnn::slice(0, required, start_index, end_index, std::nullopt);
} else { // dim 3
// permute
std::vector<int64_t> after_permute_dims = {1, 2, 0, 3};
Tensor required = ttnn::permute(result, after_permute_dims, output_mem_config);
// unpad
Shape input_shape = input_a.get_legacy_shape();
std::vector<uint32_t> start_index = {0, 0, 0, 0};
std::vector<uint32_t> end_index = {input_shape[0] - 1, input_shape[1] - 1, 0, input_shape[2] - 1};
Tensor new_unpad_tensor = ttnn::slice(0, required, start_index, end_index, std::nullopt);
// permute back
after_permute_dims = {0, 1, 3, 2};
return ttnn::permute(new_unpad_tensor, after_permute_dims, output_mem_config);
}
}

Tensor prod(const Tensor& input_a, bool all_dimensions, int64_t dim, const MemoryConfig& output_mem_config) {
return operation::decorate_as_composite(__func__, _prod)(input_a, all_dimensions, dim, output_mem_config);
}

Tensor _variance_impl(
const Tensor& y, const Tensor& mean_y, Tensor& y_minus_mean_y, const MemoryConfig& output_mem_config) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,12 +236,6 @@ Tensor logical_noti(
float immediate,
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

// prod
Tensor prod(
const Tensor& input_a,
bool all_dimensions = false,
int64_t dim = 0,
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

/*
Returns a new tensor with the signed angles in radians between vectors
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@
#include "ttnn/deprecated/tt_dnn/op_library/moreh_softmax_backward/moreh_softmax_backward_op.hpp"
#include "ttnn/deprecated/tt_dnn/op_library/moreh_sum/moreh_sum_op.hpp"
#include "ttnn/deprecated/tt_dnn/op_library/moreh_sum_backward/moreh_sum_backward_op.hpp"
#include "ttnn/deprecated/tt_dnn/op_library/prod/prod_nc_op.hpp"
#include "ttnn/deprecated/tt_dnn/op_library/prod/prod_op_all.hpp"
#include "ttnn/operations/eltwise/prod/prod_nc_op.hpp"
#include "ttnn/operations/eltwise/prod/prod_op_all.hpp"

namespace py = pybind11;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -336,29 +336,6 @@ void TensorModuleCompositeOPs(py::module& m_tensor) {
py::arg("dim") = -1,
R"doc(Applies the Gated Linear Units (GLU) function to the elements of the input tensor ``{0}`` split along dim ``{1}``.)doc",
R"doc(dimension to split)doc");
m_tensor.def(
"prod",
&prod,
py::arg("input").noconvert(),
py::arg("all_dimensions") = false,
py::arg("dim") = 0,
py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG,
R"doc(
Computes the prod function along specified ``dim`` or all dimensions on the ``input`` tensor.
If ``all_dimensions`` is set to ``true`` irrespective of given dimension it will prod along all dimensions.
Input tensor must have BFLOAT16 data type.
Output tensor will have BFLOAT16 data type.
.. csv-table::
:header: "Argument", "Description", "Data type", "Valid range", "Required"
"input", "Tensor prod is applied to", "Tensor", "Tensor of shape [W, Z, Y, X]", "Yes"
"all_dimensions", "Consider all dimension (ignores ``dim`` param)", "bool", "default to false", "No"
"dim", "Dimension to perform prod", "int", "default to 0", "Yes"
"output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No"
)doc");
detail::bind_unary_op_with_param(
m_tensor,
"geglu",
Expand Down
135 changes: 135 additions & 0 deletions ttnn/cpp/ttnn/operations/eltwise/prod/prod.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0


#include "prod.hpp"
#include "prod_nc_op.hpp"
#include "prod_op_all.hpp"
#include "ttnn/deprecated/tt_dnn/op_library/auto_format.hpp"
#include "ttnn/cpp/ttnn/operations/creation.hpp"
#include "ttnn/operations/data_movement/slice/slice.hpp"
#include "ttnn/operations/data_movement/permute/permute.hpp"
#include "tt_numpy/functions.hpp"


namespace ttnn {

namespace operations {

namespace prod {

inline Tensor zeros(
const tt::tt_metal::Shape shape, DataType data_type, Layout layout, Device* device, const MemoryConfig& output_mem_config) {
return tt::numpy::zeros(shape, data_type, layout, device, output_mem_config);
}

// Autoformat support
inline Tensor change_layout_to_tile(const Tensor& temp, const MemoryConfig& output_mem_config) {
auto formatted_input_tensor = temp;
if(formatted_input_tensor.get_layout()==Layout::ROW_MAJOR){
auto a_pad_shape = AutoFormat::pad_to_tile_shape(temp.get_legacy_shape(), false, false, true, true);
if (!AutoFormat::check_input_tensor_format(temp, a_pad_shape)) {
formatted_input_tensor = AutoFormat::format_input_tensor(temp, temp.device(), a_pad_shape, 1.0, Layout::TILE);
}
}
return formatted_input_tensor;
}

inline Tensor prod_all(const Tensor& input_a, const MemoryConfig& output_mem_config) {
auto formatted_input_tensor = input_a;
if (formatted_input_tensor.get_layout() == Layout::ROW_MAJOR) {
auto a_pad_shape = AutoFormat::pad_to_tile_shape(input_a.get_legacy_shape(), false, false, true, true);
auto out_shape = input_a.get_legacy_shape();
out_shape = {out_shape[0], out_shape[1], out_shape[2], out_shape[3]};
if (!AutoFormat::check_input_tensor_format(input_a, a_pad_shape)) {
formatted_input_tensor =
AutoFormat::format_input_tensor(input_a, input_a.device(), a_pad_shape, 1.0, Layout::TILE);
}
}
return tt::operations::primary::prod_all(formatted_input_tensor, output_mem_config);
}

inline Tensor prod_nc(const Tensor& temp, int64_t dim, const MemoryConfig& output_mem_config) {
// layout conversion
auto formatted_input_tensor = temp;
if (formatted_input_tensor.get_layout() == Layout::ROW_MAJOR) {
auto a_pad_shape = AutoFormat::pad_to_tile_shape(temp.get_legacy_shape(), false, false, true, true);
auto out_shape = temp.get_legacy_shape();
out_shape = {out_shape[0], out_shape[1], out_shape[2], out_shape[3]};
if (!AutoFormat::check_input_tensor_format(temp, a_pad_shape)) {
formatted_input_tensor =
AutoFormat::format_input_tensor(temp, temp.device(), a_pad_shape, 1.0, Layout::TILE);
}
}
// Apply prod
std::vector<int64_t> dimension = {(dim == 1 || dim == -3) ? 1 : 0};
tt::tt_metal::Shape input_shape = formatted_input_tensor.get_legacy_shape();
tt::tt_metal::Shape required = {
((dim == 1 || dim == -3) ? input_shape[0] : 1),
((dim == 1 || dim == -3) ? 1 : input_shape[1]),
input_shape[2],
input_shape[3]};
return tt::operations::primary::prod_nc(
formatted_input_tensor,
zeros(
required,
formatted_input_tensor.get_dtype(),
formatted_input_tensor.get_layout(),
formatted_input_tensor.device(),
output_mem_config),
dimension,
output_mem_config);
}


Tensor ProdOp::_prod(const Tensor& input_a, bool all_dimensions, int64_t dim, const MemoryConfig& output_mem_config) {
if (all_dimensions) {
return prod_all(input_a, output_mem_config);
}
TT_FATAL(dim >= -4 && dim <= 3 && "Dimension out of range (expected to be in range of [-4, 3]");
Tensor temp = input_a;
// Permute for dim 2,3
if (dim == 2 || dim == -2) {
std::vector<int64_t> permute_dims = {2, 0, 1, 3};
temp = ttnn::permute(input_a, permute_dims, output_mem_config);
} else if (dim == 3 || dim == -1) {
std::vector<int64_t> permute_dims = {3, 0, 1, 2};
temp = ttnn::permute(input_a, permute_dims, output_mem_config);
}
Tensor result = prod_nc(temp, dim, output_mem_config);
// Permute and unpad result for dim 2,3
if (dim == 0 || dim == 1 || dim == -4 || dim == -3) {
return result;
} else if (dim == 2 || dim == -2) {
std::vector<int64_t> after_permute_dims = {1, 2, 0, 3};
Tensor required = ttnn::permute(result, after_permute_dims, output_mem_config);
tt::tt_metal::Shape input_shape = input_a.get_legacy_shape();
std::vector<uint32_t> start_index = {0, 0, 0, 0};
std::vector<uint32_t> end_index = {input_shape[0] - 1, input_shape[1] - 1, 0, input_shape[3] - 1};
return ttnn::slice(0, required, start_index, end_index, std::nullopt);
} else { // dim 3
// permute
std::vector<int64_t> after_permute_dims = {1, 2, 0, 3};
Tensor required = ttnn::permute(result, after_permute_dims, output_mem_config);
// unpad
tt::tt_metal::Shape input_shape = input_a.get_legacy_shape();
std::vector<uint32_t> start_index = {0, 0, 0, 0};
std::vector<uint32_t> end_index = {input_shape[0] - 1, input_shape[1] - 1, 0, input_shape[2] - 1};
Tensor new_unpad_tensor = ttnn::slice(0, required, start_index, end_index, std::nullopt);
// permute back
after_permute_dims = {0, 1, 3, 2};
Tensor res_host = ttnn::permute(new_unpad_tensor, after_permute_dims, output_mem_config);
if(res_host.storage_type() != StorageType::DEVICE or res_host.storage_type() != StorageType::MULTI_DEVICE) {
res_host = res_host.pad_to_tile(0.0f);
res_host = res_host.to(Layout::TILE);
res_host = res_host.to(input_a.device());
}
return res_host;
}
}


} // namespace prod
} // namespace operations
} // namespace ttnn
44 changes: 44 additions & 0 deletions ttnn/cpp/ttnn/operations/eltwise/prod/prod.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include <optional>
#include <functional>

#include "ttnn/decorators.hpp"
#include "ttnn/operations/core/core.hpp"

namespace ttnn {

namespace operations {

namespace prod {

struct ProdOp {
static Tensor _prod(
const Tensor& input_a,
bool all_dimensions,
int64_t dim,
const MemoryConfig& output_mem_config);
};


struct ExecuteProdOp {
static Tensor execute_on_worker_thread(
const Tensor& input,
bool all_dimensions = false,
int64_t dim = 0,
const std::optional<MemoryConfig>& memory_config = std::nullopt) {

return ProdOp::_prod(input, all_dimensions, dim, memory_config.value_or(input.memory_config()));
}
};

} // namespace prod
} // namespace operations

constexpr auto prod = ttnn::register_operation<operations::prod::ExecuteProdOp>("ttnn::prod");

} // namespace ttnn
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ operation::ProgramWithCallbacks prod_nc_format(const Tensor &input, const Tensor
bool output_is_dram = output_buffer_type->buffer_type() == tt_metal::BufferType::DRAM ? 1 : 0;
std::vector<uint32_t> writer_compile_time_args = {(std::uint32_t) cb_id_out, (std::uint32_t) output_is_dram};

const auto reader_kernel_file = "ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/prod/kernels/dataflow/reader_prod_nc.cpp";
const auto reader_kernel_file = "ttnn/cpp/ttnn/operations/eltwise/prod/device/kernels/dataflow/reader_prod_nc.cpp";
const auto writer_kernel_file = "ttnn/cpp/ttnn/operations/eltwise/unary/device/kernels/dataflow/writer_unary_interleaved_start_id.cpp";
const auto reader_kernel_id = CreateReadKernel(program, reader_kernel_file, all_cores, reader_compile_time_args);
const auto writer_kernel_id = CreateWriteKernel(program, writer_kernel_file, all_cores, writer_compile_time_args);
Expand All @@ -107,7 +107,8 @@ operation::ProgramWithCallbacks prod_nc_format(const Tensor &input, const Tensor
////////////////////////////////////////////////////////////////////////////
const std::vector<uint32_t> compute_args_group_1{num_cols_per_core_group_1};
std::map<string, string> compute_defines;
const auto compute_kernel_file = "ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/prod/kernels/compute/prod_nc.cpp";

const auto compute_kernel_file = "ttnn/cpp/ttnn/operations/eltwise/prod/device/kernels/compute/prod_nc.cpp";
const auto compute_kernel_1_id = CreateComputeKernel(
program, compute_kernel_file, {core_group_1, num_cols_per_core_group_1, compute_args_group_1}, compute_defines);

Expand Down
Loading

0 comments on commit de5eee5

Please sign in to comment.