From 9713dcb08f577218112825532eb00bde6662bad3 Mon Sep 17 00:00:00 2001 From: VirdhatchaniKN Date: Tue, 9 Apr 2024 09:42:39 +0000 Subject: [PATCH] #7201: Add support for unary ne --- .../apis/kernel_apis/compute/compute.rst | 1 + .../kernel_apis/compute/unary_ne_tile.rst | 5 +++ docs/source/ttnn/ttnn/dependencies/tt_lib.rst | 2 + .../python_api_testing/sweep_tests/op_map.py | 4 ++ .../pytests/tt_dnn/test_eltwise_unary.py | 33 +++++++++++++++ .../sweep_tests/pytorch_ops.py | 6 +++ .../sweep_tests/tt_lib_ops.py | 18 ++++++++ .../eltwise_unary/eltwise_unary_op.cpp | 1 + .../eltwise_unary/eltwise_unary_op.hpp | 7 +++- .../csrc/tt_lib_bindings_tensor_xary_ops.cpp | 7 ++++ .../metal/llk_api/llk_math_unary_sfpu_api.h | 1 + .../llk_sfpu/ckernel_sfpu_unary_comp.h | 42 +++++++++++++++++++ .../llk_math_eltwise_unary_sfpu_unary_comp.h | 29 +++++++++++++ .../grayskull/metal/llk_api/llk_sfpu_types.h | 1 + .../metal/llk_api/llk_math_unary_sfpu_api.h | 1 + .../llk_sfpu/ckernel_sfpu_unary_comp.h | 42 +++++++++++++++++++ .../llk_math_eltwise_unary_sfpu_unary_comp.h | 29 +++++++++++++ .../metal/llk_api/llk_sfpu_types.h | 1 + tt_metal/include/compute_kernel_api.h | 25 +++++++++++ 19 files changed, 253 insertions(+), 2 deletions(-) create mode 100644 docs/source/tt-metalium/tt_metal/apis/kernel_apis/compute/unary_ne_tile.rst create mode 100644 tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/ckernel_sfpu_unary_comp.h create mode 100644 tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_unary_comp.h create mode 100644 tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_unary_comp.h create mode 100644 tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_unary_comp.h diff --git a/docs/source/tt-metalium/tt_metal/apis/kernel_apis/compute/compute.rst b/docs/source/tt-metalium/tt_metal/apis/kernel_apis/compute/compute.rst index c4cc6d0f9f8..138c1fac8bc 100644 --- a/docs/source/tt-metalium/tt_metal/apis/kernel_apis/compute/compute.rst +++ b/docs/source/tt-metalium/tt_metal/apis/kernel_apis/compute/compute.rst @@ -64,6 +64,7 @@ Compute APIs gtz_tile gez_tile nez_tile + unary_ne_tile cb_wait_front cb_pop_front diff --git a/docs/source/tt-metalium/tt_metal/apis/kernel_apis/compute/unary_ne_tile.rst b/docs/source/tt-metalium/tt_metal/apis/kernel_apis/compute/unary_ne_tile.rst new file mode 100644 index 00000000000..2b121d91ebd --- /dev/null +++ b/docs/source/tt-metalium/tt_metal/apis/kernel_apis/compute/unary_ne_tile.rst @@ -0,0 +1,5 @@ +unary_ne_tile +-------------- + +.. doxygenfunction:: unary_ne_tile_init() +.. doxygenfunction:: unary_ne_tile(uint32_t idst, uint32_t param0) diff --git a/docs/source/ttnn/ttnn/dependencies/tt_lib.rst b/docs/source/ttnn/ttnn/dependencies/tt_lib.rst index 9554b35ed37..f8eac1fd97e 100644 --- a/docs/source/ttnn/ttnn/dependencies/tt_lib.rst +++ b/docs/source/ttnn/ttnn/dependencies/tt_lib.rst @@ -501,6 +501,8 @@ Tensor relational operations .. autofunction:: tt_lib.tensor.ne +.. autofunction:: tt_lib.tensor.unary_ne + Tensor ternary operations ========================= .. autofunction:: tt_lib.tensor.where diff --git a/tests/tt_eager/python_api_testing/sweep_tests/op_map.py b/tests/tt_eager/python_api_testing/sweep_tests/op_map.py index 56a9f14e7da..cfea60b904e 100644 --- a/tests/tt_eager/python_api_testing/sweep_tests/op_map.py +++ b/tests/tt_eager/python_api_testing/sweep_tests/op_map.py @@ -582,6 +582,10 @@ "tt_lib_op": tt_lib_ops.eltwise_heaviside, "pytorch_op": pytorch_ops.heaviside, }, + "eltwise-unary_ne": { + "tt_lib_op": tt_lib_ops.eltwise_unary_ne, + "pytorch_op": pytorch_ops.unary_ne, + }, "eltwise-erf": { "tt_lib_op": tt_lib_ops.eltwise_erf, "pytorch_op": pytorch_ops.erf, diff --git a/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_eltwise_unary.py b/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_eltwise_unary.py index 5dd18fbf1b6..dbeef733dbe 100644 --- a/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_eltwise_unary.py +++ b/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_eltwise_unary.py @@ -615,6 +615,39 @@ def test_run_eltwise_heaviside( test_args, ) + @pytest.mark.parametrize("unary_comp", ["unary_ne"]) + @pytest.mark.parametrize("scalar", [0.5, 1.0, -1.0, 0.0]) + def test_run_eltwise_unary_comp( + self, + unary_comp, + input_shapes, + scalar, + device, + function_level_defaults, + input_mem_config, + output_mem_config, + ): + datagen_func = [ + generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-100, high=100), torch.bfloat16) + ] + test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0] + test_args.update({"scalar": scalar}) + test_args.update( + { + "input_mem_config": [input_mem_config], + "output_mem_config": output_mem_config, + } + ) + comparison_func = comparison_funcs.comp_equal + run_single_pytorch_test( + f"eltwise-{unary_comp}", + input_shapes, + datagen_func, + comparison_func, + device, + test_args, + ) + @pytest.mark.parametrize("unary_kind", ["add_unary", "sub_unary", "mul_unary", "div_unary"]) @pytest.mark.parametrize("scalar", [-2.0, 1.0, 2.0, 8.0]) def test_run_eltwise_binop_to_unary_ops( diff --git a/tests/tt_eager/python_api_testing/sweep_tests/pytorch_ops.py b/tests/tt_eager/python_api_testing/sweep_tests/pytorch_ops.py index b024cdba75d..d95ee98a201 100644 --- a/tests/tt_eager/python_api_testing/sweep_tests/pytorch_ops.py +++ b/tests/tt_eager/python_api_testing/sweep_tests/pytorch_ops.py @@ -548,6 +548,12 @@ def heaviside(x, *args, **kwargs): return result +def unary_ne(x, *args, **kwargs): + value = kwargs.pop("scalar") + result = torch.ne(x, value) + return result + + def erf(x, *args, **kwargs): return torch.erf(x) diff --git a/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py b/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py index 71a0d2027af..6f05a4cc812 100644 --- a/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py +++ b/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py @@ -1082,6 +1082,24 @@ def eltwise_heaviside( return tt2torch_tensor(t1) +@setup_host_and_device +def eltwise_unary_ne( + x, + *args, + scalar, + device, + dtype, + layout, + input_mem_config, + output_mem_config, + **kwargs, +): + t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0]) + t1 = ttl.tensor.unary_ne(t0, scalar, output_mem_config=output_mem_config) + + return tt2torch_tensor(t1) + + @setup_host_and_device def repeat_interleave( x, diff --git a/tt_eager/tt_dnn/op_library/eltwise_unary/eltwise_unary_op.cpp b/tt_eager/tt_dnn/op_library/eltwise_unary/eltwise_unary_op.cpp index 2449384cd66..28890ebb4ef 100644 --- a/tt_eager/tt_dnn/op_library/eltwise_unary/eltwise_unary_op.cpp +++ b/tt_eager/tt_dnn/op_library/eltwise_unary/eltwise_unary_op.cpp @@ -131,6 +131,7 @@ std::pair get_op_init_and_func_parameterized(UnaryOpType op_type case UnaryOpType::ADD_UNARY_SFPU: op_init_and_name = {"binop_with_scalar_tile_init();", fmt::format("add_unary_tile({}, {}u);", idst, Converter::to_hex(param0))}; break; case UnaryOpType::MUL_UNARY_SFPU: op_init_and_name = {"binop_with_scalar_tile_init();", fmt::format("mul_unary_tile({}, {}u);", idst, Converter::to_hex(param0))}; break; case UnaryOpType::DIV_UNARY_SFPU: op_init_and_name = {"binop_with_scalar_tile_init();", fmt::format("div_unary_tile({}, {}u);", idst, Converter::to_hex(1.0f/param0))}; break; + case UnaryOpType::UNARY_NE: op_init_and_name = {"unary_ne_tile_init();", fmt::format("unary_ne_tile({}, {}u);", idst, Converter::to_hex(param0))}; break; default: TT_ASSERT( false && "unexpected parameterized type"); }; diff --git a/tt_eager/tt_dnn/op_library/eltwise_unary/eltwise_unary_op.hpp b/tt_eager/tt_dnn/op_library/eltwise_unary/eltwise_unary_op.hpp index b0fd4689ece..f1678814ed2 100644 --- a/tt_eager/tt_dnn/op_library/eltwise_unary/eltwise_unary_op.hpp +++ b/tt_eager/tt_dnn/op_library/eltwise_unary/eltwise_unary_op.hpp @@ -74,7 +74,8 @@ enum class UnaryOpType { SUB_UNARY_SFPU = 56, MUL_UNARY_SFPU = 57, DIV_UNARY_SFPU = 58, - IDENTITY_UINT32 = 59 + IDENTITY_UINT32 = 59, + UNARY_NE = 60 }; template @@ -100,7 +101,8 @@ bool is_parametrized_type(T val) { case UnaryOpType::ADD_UNARY_SFPU: case UnaryOpType::SUB_UNARY_SFPU: case UnaryOpType::MUL_UNARY_SFPU: - case UnaryOpType::DIV_UNARY_SFPU: return true; + case UnaryOpType::DIV_UNARY_SFPU: + case UnaryOpType::UNARY_NE: return true; default: return false; } return false; @@ -308,6 +310,7 @@ constexpr auto leaky_relu = make_eltwise_unary_with_param{}; constexpr auto heaviside = make_eltwise_unary_with_param{}; +constexpr auto unary_ne = make_eltwise_unary_with_param{}; constexpr auto rsub = make_eltwise_unary_with_param{}; constexpr auto silu = make_eltwise_unary{}; constexpr auto identity = make_eltwise_unary{}; diff --git a/tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_xary_ops.cpp b/tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_xary_ops.cpp index b508bbe8286..e4a840c0268 100644 --- a/tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_xary_ops.cpp +++ b/tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_xary_ops.cpp @@ -149,6 +149,13 @@ namespace tt::tt_metal::detail { HEAVISIDE(x) = 0 if x < 0 , 1 if x > 0 , else value.)doc", R"doc("value", "float", "")doc" + ); + detail::bind_unary_op_with_param( + m_tensor, "unary_ne", unary_ne, + py::arg("value"), + R"doc(Perform an eltwise-unary not-equal (``{0} != {1}``) on input tensor.)doc", + R"doc("value", "float", "")doc" + ); detail::bind_unary_op_with_param( m_tensor, "rdiv", rdiv, diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_math_unary_sfpu_api.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_math_unary_sfpu_api.h index 8162c89d5e7..7df111b4314 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_math_unary_sfpu_api.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_math_unary_sfpu_api.h @@ -17,6 +17,7 @@ #include "llk_math_eltwise_unary_sfpu_silu.h" #include "llk_math_eltwise_unary_sfpu_topk.h" #include "llk_math_eltwise_unary_sfpu_trigonometry.h" +#include "llk_math_eltwise_unary_sfpu_unary_comp.h" namespace ckernel { diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/ckernel_sfpu_unary_comp.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/ckernel_sfpu_unary_comp.h new file mode 100644 index 00000000000..e38119654f8 --- /dev/null +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/ckernel_sfpu_unary_comp.h @@ -0,0 +1,42 @@ +// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "ckernel.h" +#include "ckernel_defs.h" +#include "noc_nonblocking_api.h" +#include "ckernel_sfpu_converter.h" + +using namespace sfpi; + +namespace ckernel { +namespace sfpu { + +template +inline void calculate_unary_ne(uint value) +{ + // SFPU microcode + Converter c_value; + c_value.u = value; + vFloat s = c_value.f; + + #pragma GCC unroll 0 + for (int d = 0; d < ITERATIONS; d++) { + vFloat v = dst_reg[0]; + v_if (v == s) { + v = 0.0f; + }v_else { + v = 1.0f; + } + v_endif; + + dst_reg[0] = v; + + dst_reg++; + } +} + +} // namespace sfpu +} // namespace ckernel diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_unary_comp.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_unary_comp.h new file mode 100644 index 00000000000..b135abd15ac --- /dev/null +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_unary_comp.h @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_1_param.h" +#include "ckernel_sfpu_unary_comp.h" + +namespace ckernel { + +// New LLK SFPU APIs + +//Unary Not equal +template +inline void llk_math_eltwise_unary_sfpu_unary_ne_init() { + llk_math_eltwise_unary_sfpu_init(); +} + +template +inline void llk_math_eltwise_unary_sfpu_unary_ne(uint dst_index, uint param0, int vector_mode = (int)VectorMode::RC) { + llk_math_eltwise_unary_sfpu_1_param + (ckernel::sfpu::calculate_unary_ne, + ckernel::sfpu::calculate_unary_ne, + dst_index, vector_mode, param0); +} + +} diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu_types.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu_types.h index bf23a084b6d..a1b5c061392 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu_types.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu_types.h @@ -60,5 +60,6 @@ enum SfpuType { silu, mask, negative, + unary_ne, unused, }; diff --git a/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_math_unary_sfpu_api.h b/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_math_unary_sfpu_api.h index 23972fb69f9..a01df935a2e 100644 --- a/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_math_unary_sfpu_api.h +++ b/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_math_unary_sfpu_api.h @@ -21,4 +21,5 @@ #include "llk_math_eltwise_unary_sfpu_square.h" #include "llk_math_eltwise_unary_sfpu_tanh.h" #include "llk_math_eltwise_unary_sfpu_topk.h" +#include "llk_math_eltwise_unary_sfpu_unary_comp.h" #include "llk_math_eltwise_unary_sfpu_trigonometry.h" diff --git a/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_unary_comp.h b/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_unary_comp.h new file mode 100644 index 00000000000..c9c379d9cee --- /dev/null +++ b/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_unary_comp.h @@ -0,0 +1,42 @@ +// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "ckernel.h" +#include "ckernel_defs.h" +#include "noc_nonblocking_api.h" +#include "ckernel_sfpu_converter.h" + +using namespace sfpi; + +namespace ckernel { +namespace sfpu { + +template +inline void calculate_unary_ne(uint value) +{ + // SFPU microcode + Converter c_value; + c_value.u = value; + vFloat s = c_value.f; + + #pragma GCC unroll 0 + for (int d = 0; d < ITERATIONS; d++) { + vFloat v = dst_reg[0]; + v_if (v == s) { + v = 0.0f; + }v_else { + v = 1.0f; + } + v_endif; + + dst_reg[0] = v; + + dst_reg++; + } +} + +} // namespace sfpu +} // namespace ckernel diff --git a/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_unary_comp.h b/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_unary_comp.h new file mode 100644 index 00000000000..03591ff227a --- /dev/null +++ b/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_unary_comp.h @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_1_param.h" +#include "ckernel_sfpu_unary_comp.h" + +namespace ckernel { + +// New LLK SFPU APIs + +//Unary Not equal +template +inline void llk_math_eltwise_unary_sfpu_unary_ne_init() { + llk_math_eltwise_unary_sfpu_init(); +} + +template +inline void llk_math_eltwise_unary_sfpu_unary_ne(uint dst_index, uint param0, int vector_mode = (int)VectorMode::RC) { + llk_math_eltwise_unary_sfpu_1_param + (ckernel::sfpu::calculate_unary_ne, + ckernel::sfpu::calculate_unary_ne, + dst_index, vector_mode, param0); +} + +} diff --git a/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu_types.h b/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu_types.h index e2105c90b67..78be34e6260 100644 --- a/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu_types.h +++ b/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu_types.h @@ -70,5 +70,6 @@ enum SfpuType { topk_local_sort, topk_merge, topk_rebuild, + unary_ne, unused, }; diff --git a/tt_metal/include/compute_kernel_api.h b/tt_metal/include/compute_kernel_api.h index b1ea83c175c..580b45f86ea 100644 --- a/tt_metal/include/compute_kernel_api.h +++ b/tt_metal/include/compute_kernel_api.h @@ -533,6 +533,31 @@ ALWI void heaviside_tile_init() { MATH(( llk_math_eltwise_unary_sfpu_heaviside_init() )); } +//unary ne : if x !=value --> 1.0, else 0.0 +/** + * Performs element-wise computation of: result = 1 if x!=value , where x is each element of a tile + * in DST register at index tile_index. The value is provided as const param0 The DST register buffer must be in + * acquired state via *acquire_dst* call. This call is blocking and is only + * available on the compute engine. + * + * Return value: None + * + * | Argument | Description | Type | Valid Range | Required | + * |-----------------|----------------------------------------------------------------------------|----------|-------------------------------------------------------|----------| + * | idst | The index of the tile in DST register buffer to perform the computation on | uint32_t | Must be less than the size of the DST register buffer | True | + * | param0 | The value to be compared with the input tensor | uint32_t | | True | + */ +ALWI void unary_ne_tile(uint32_t idst,uint32_t param0) { + MATH(( llk_math_eltwise_unary_sfpu_unary_ne(idst,param0) )); +} + +/** + * Please refer to documentation for any_init. + */ +ALWI void unary_ne_tile_init() { + MATH(( llk_math_eltwise_unary_sfpu_unary_ne_init() )); +} + //expm1 : (exp(x) - 1) /** * Performs element-wise computation of exp(x) - 1, v where x is each element of a tile