From 354c04767231553693e7d78985e34cb321bf1e18 Mon Sep 17 00:00:00 2001 From: Bharane AB Date: Sat, 27 Jul 2024 14:42:33 +0000 Subject: [PATCH] #10778: Update argmax op with ttnn support --- docs/source/ttnn/ttnn/api.rst | 1 + docs/source/ttnn/ttnn/dependencies/tt_lib.rst | 2 - docs/source/ttnn/ttnn/ttnn/logical_not_.rst | 6 + .../grayskull/test_argmax_padding.py | 2 +- tests/ttnn/profiling/ops_for_profiling.py | 18 +-- .../op_library/composite/composite_ops.cpp | 127 ------------------ .../op_library/composite/composite_ops.hpp | 6 - .../tt_lib_bindings_tensor_composite_ops.cpp | 24 ---- 8 files changed, 17 insertions(+), 169 deletions(-) create mode 100644 docs/source/ttnn/ttnn/ttnn/logical_not_.rst diff --git a/docs/source/ttnn/ttnn/api.rst b/docs/source/ttnn/ttnn/api.rst index a2a0b38c9dd5..282439ba2d98 100644 --- a/docs/source/ttnn/ttnn/api.rst +++ b/docs/source/ttnn/ttnn/api.rst @@ -75,6 +75,7 @@ Pointwise Unary ttnn/abs ttnn/acos + ttnn/logical_not_ ttnn/acosh ttnn/asin ttnn/asinh diff --git a/docs/source/ttnn/ttnn/dependencies/tt_lib.rst b/docs/source/ttnn/ttnn/dependencies/tt_lib.rst index d13bba05a175..82d8e92c54d7 100644 --- a/docs/source/ttnn/ttnn/dependencies/tt_lib.rst +++ b/docs/source/ttnn/ttnn/dependencies/tt_lib.rst @@ -529,8 +529,6 @@ Other Operations .. autofunction:: tt_lib.tensor.repeat -.. autofunction:: tt_lib.tensor.argmax - Loss Functions ============== diff --git a/docs/source/ttnn/ttnn/ttnn/logical_not_.rst b/docs/source/ttnn/ttnn/ttnn/logical_not_.rst new file mode 100644 index 000000000000..b9078c4f7294 --- /dev/null +++ b/docs/source/ttnn/ttnn/ttnn/logical_not_.rst @@ -0,0 +1,6 @@ +.. _ttnn.logical_not_: + +ttnn.logical_not_ +################### + +.. autofunction:: ttnn.logical_not_ diff --git a/tests/tt_eager/python_api_testing/non_working_unit_tests/grayskull/test_argmax_padding.py b/tests/tt_eager/python_api_testing/non_working_unit_tests/grayskull/test_argmax_padding.py index fc52f8357071..1059257803e9 100644 --- a/tests/tt_eager/python_api_testing/non_working_unit_tests/grayskull/test_argmax_padding.py +++ b/tests/tt_eager/python_api_testing/non_working_unit_tests/grayskull/test_argmax_padding.py @@ -31,7 +31,7 @@ def test_argmax(self, input_shapes, dim, all, device): .to(tt_lib.tensor.Layout.TILE) .to(device) ) - tt_output_tensor_on_device = tt_lib.tensor.argmax(input_tensor, dim=dim, all=all) + tt_output_tensor_on_device = ttnn.experimental.argmax(input_tensor, dim=dim, all=all) tt_out_tensor = tt_output_tensor_on_device.cpu().to(tt_lib.tensor.Layout.ROW_MAJOR).to_torch() if all: golden_tensor = torch.argmax(input_data) diff --git a/tests/ttnn/profiling/ops_for_profiling.py b/tests/ttnn/profiling/ops_for_profiling.py index 8101448cf346..aa9d454dfd2b 100644 --- a/tests/ttnn/profiling/ops_for_profiling.py +++ b/tests/ttnn/profiling/ops_for_profiling.py @@ -1516,23 +1516,23 @@ def pow_float(x): def argmax_1(x): - tt_lib.tensor.argmax(x, dim=-1) + ttnn.argmax(x, dim=-1) def argmax_2(x): - tt_lib.tensor.argmax(x, dim=-2) + ttnn.argmax(x, dim=-2) def argmax_3(x): - tt_lib.tensor.argmax(x, dim=-3) + ttnn.argmax(x, dim=-3) def argmax_4(x): - tt_lib.tensor.argmax(x, dim=-4) + ttnn.argmax(x, dim=-4) def argmax_all(x): - tt_lib.tensor.argmax(x, dim=-1, all=True) + ttnn.argmax(x, dim=-1, all=True) def argmin_1(x): @@ -2264,22 +2264,22 @@ def clone(x): }, { "op": argmax_1, - "name": "tt_lib.tensor.argmax_dim_3", + "name": "ttnn.argmax_dim_3", "num_repeats": 2, }, { "op": argmax_2, - "name": "tt_lib.tensor.argmax_dim_2", + "name": "ttnn.argmax_dim_2", "num_repeats": 2, }, { "op": argmax_3, - "name": "tt_lib.tensor.argmax_dim_1", + "name": "ttnn.argmax_dim_1", "num_repeats": 2, }, { "op": argmax_all, - "name": "tt_lib.tensor.argmax_all", + "name": "ttnn.argmax_all", "num_repeats": 2, }, { diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.cpp b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.cpp index 6fd2570a8911..5c2b32b0c83b 100644 --- a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.cpp +++ b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.cpp @@ -733,14 +733,6 @@ Tensor sfpu_eps(const Shape shape, Layout layout, Device* device, const MemoryCo return operation::decorate_as_composite(__func__, _sfpu_eps)(shape, layout, device, output_mem_config); } -Tensor triu( - const Tensor& input_a, - int32_t dim /* = -1 */, - const MemoryConfig& output_mem_config /* = operation::DEFAULT_OUTPUT_MEMORY_CONFIG */) { - return operation::decorate_as_composite(__func__, _triu)(input_a, dim, output_mem_config); -} - - Tensor create_mask(const Tensor& input_a, const MemoryConfig& output_mem_config) { auto& padded_shape = input_a.get_legacy_shape(); auto& unpadded_shape = padded_shape.without_padding(); @@ -751,125 +743,6 @@ Tensor create_mask(const Tensor& input_a, const MemoryConfig& output_mem_config) masked_input = ttnn::where(masked_input, input_a, t_inf, output_mem_config); return masked_input; } -// Argmax returns the index of maximum element in the tensor -Tensor _argmax(const Tensor& input_t, int64_t _dim, bool all, const MemoryConfig& output_mem_config) { - std::vector output_tensors = {Tensor(operation::get_workers_for_op_output({input_t}))}; - operation::launch_with_autoformat( - [_dim, all, output_mem_config]( - const std::vector& input_tensors, - const std::vector>& optional_input_tensors, - const std::vector>& optional_output_tensors) mutable -> std::vector { - const auto& input = input_tensors.at(0); - auto& input_shape = input.get_legacy_shape(); - TT_FATAL(input_shape.rank() == 4, "supported for rank-4 tensors at this time"); - - Tensor input_a = create_mask(input, output_mem_config); - - uint32_t dim = input_shape.get_normalized_index(_dim); - int size = input_a.volume(); - - if (!all) { - if ((dim == (input_shape.rank() - 1)) || (dim == (input_shape.rank() - 2))) { - bool is_width = (dim == (input_shape.rank() - 1)); - Tensor max_val = max(input_a, dim, output_mem_config); - Tensor max_tensor = zeros_like(input_a, output_mem_config); - Tensor tindex = tt::numpy::index_width( - input_shape, DataType::BFLOAT16, Layout::TILE, input_a.device(), output_mem_config); - if (is_width) { - max_tensor = ttnn::add(max_tensor, max_val, std::nullopt, output_mem_config); - } else { - tindex = tt::numpy::index_height( - input_shape, DataType::BFLOAT16, Layout::TILE, input_a.device(), output_mem_config); - max_tensor = ttnn::add(max_tensor, max_val, std::nullopt, output_mem_config); - } - tindex = tindex.to(input_a.device()); - max_val.deallocate(); - Tensor cmp_results = ttnn::eq(input_a, max_tensor, std::nullopt, output_mem_config); - max_tensor.deallocate(); - Tensor max_indices = ttnn::multiply(cmp_results, tindex, std::nullopt, output_mem_config); - cmp_results.deallocate(); - Tensor result = ttnn::where(ttnn::eqz(max_indices), size, max_indices, output_mem_config); - max_indices.deallocate(); - result = min(result, dim, output_mem_config); - Tensor res_index = zeros_like(result, output_mem_config); - result = ttnn::where(ttnn::eq(result, size), res_index, result, output_mem_config); - std::vector permute_dims = {3, 0, 1, 2}; - if (is_width) { - res_index = ttnn::add(res_index, result, std::nullopt, output_mem_config); - } else { - res_index = ttnn::add(res_index, result, std::nullopt, output_mem_config); - permute_dims[0] = 2; - permute_dims[3] = 3; - } - result.deallocate(); - Tensor transpose_res = ttnn::permute(res_index, permute_dims, output_mem_config); - return {transpose_res}; - } else if ((dim == (input_shape.rank() - 3)) || (dim == (input_shape.rank() - 4))) { - bool is_channel = (dim == (input_shape.rank() - 3)); - Tensor max_val = max(input_a, dim, output_mem_config); - int repeat = input.get_shape()[dim]; - std::vector combined_tensors; - for (int cid = 0; cid < repeat; cid++) combined_tensors.emplace_back(max_val); - max_val.deallocate(); - Tensor concat_out = concat(combined_tensors, dim, output_mem_config); - // Needed till `max` stops autoformatting output - concat_out = ttnn::reshape(concat_out, input_a.get_shape()); - Tensor cmp_results = ttnn::eq(input_a, concat_out, std::nullopt, output_mem_config); - concat_out.deallocate(); - Tensor tindex = tt::numpy::index_channel( - input_shape, DataType::BFLOAT16, Layout::TILE, input_a.device(), output_mem_config); - if (!is_channel) { - tindex = tt::numpy::index_batch( - input_shape, DataType::BFLOAT16, Layout::TILE, input_a.device(), output_mem_config); - } - tindex = tindex.to(input_a.device()); - Tensor max_indices = ttnn::multiply(cmp_results, tindex, std::nullopt, output_mem_config); - cmp_results.deallocate(); - Tensor midx = full_like(max_indices, size); - Tensor result = ttnn::where(ttnn::eqz(max_indices), midx, max_indices, output_mem_config); - max_indices.deallocate(); - result = min(result, dim, output_mem_config); - Tensor res_index = zeros_like(result, output_mem_config); - result = ttnn::where(ttnn::eq(result, full_like(result, size)), res_index, result, output_mem_config); - res_index.deallocate(); - if (is_channel) { - std::vector permute_dims = {1, 0, 2, 3}; - Tensor transpose_res = ttnn::permute(result, permute_dims, output_mem_config); - return {transpose_res}; - } else { - return {result}; - } - } - } - // TODO: Fix the index generation code. With the fix the code will work for argmax that return entire - // maximum value index - Tensor tindex = tt::numpy::index_all( - input_shape, DataType::BFLOAT16, Layout::TILE, input_a.device(), output_mem_config); - Tensor max_val = global_max(input_a, output_mem_config); - Tensor max_tensor = zeros_like(input_a, output_mem_config); - max_tensor = ttnn::add(max_tensor, max_val, std::nullopt, output_mem_config); - max_val.deallocate(); - Tensor cmp_results = ttnn::eq(input_a, max_tensor, std::nullopt, output_mem_config); - max_tensor.deallocate(); - Tensor max_indices = ttnn::multiply(cmp_results, tindex, std::nullopt, output_mem_config); - cmp_results.deallocate(); - Tensor result = ttnn::where(ttnn::eqz(max_indices), size, max_indices, output_mem_config); - max_indices.deallocate(); - result = global_min(result, output_mem_config); - return {result}; - }, - {input_t}, - output_tensors); - return output_tensors.at(0); -} - -Tensor argmax( - const Tensor& input_a, - int64_t dim, - bool all, - const MemoryConfig& output_mem_config /* = operation::DEFAULT_OUTPUT_MEMORY_CONFIG */) { - return operation::decorate_as_composite(__func__, _argmax)(input_a, dim, all, output_mem_config); -} } // namespace tt_metal diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.hpp b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.hpp index 42bf420446af..c0a342e4fceb 100644 --- a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.hpp +++ b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.hpp @@ -276,12 +276,6 @@ Tensor logical_ori( // on-device tensor creation with shape and filled with value Tensor sfpu_eps(const Shape shape, Layout layout, Device* device, const MemoryConfig& output_mem_config); -Tensor argmax( - const Tensor& input_a, - int64_t dim = 0, - bool all = false, - const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG); - } // namespace tt_metal } // namespace tt diff --git a/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/tt_lib_bindings_tensor_composite_ops.cpp b/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/tt_lib_bindings_tensor_composite_ops.cpp index db5a39c16846..ed1740d4b515 100644 --- a/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/tt_lib_bindings_tensor_composite_ops.cpp +++ b/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/tt_lib_bindings_tensor_composite_ops.cpp @@ -118,30 +118,6 @@ void TensorModuleCompositeOPs(py::module& m_tensor) { R"doc(Perform an eltwise logical OR (``{0} || {1}``) on input tensor and immediate value.)doc", R"doc("Scalar", "float", "")doc"); - m_tensor.def( - "argmax", - &argmax, - py::arg("input").noconvert(), - py::arg("dim"), - py::arg("all") = false, - py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - R"doc( - Returns the indices of the maximum value of elements in the ``input`` tensor - If ``all`` is set to ``true`` irrespective of given dimension it will return the indices of maximum value of all elements in given ``input`` - - Input tensor must have BFLOAT16 data type. - - Output tensor will have BFLOAT16 data type. - - .. csv-table:: - :header: "Argument", "Description", "Data type", "Valid range", "Required" - - "input", "Tensor argmax is applied to", "Tensor", "Tensor of shape [W, Z, Y, X]", "Yes" - "dim", "Dimension to perform argmax", "int", "", "Yes" - "all", "Consider all dimension (ignores ``dim`` param)", "bool", "default to false", "No" - "output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No" - )doc"); - m_tensor.def( "lerp", py::overload_cast(&lerp),