From 9bdbbe5463e2902d63f95e1dc5138d9f34ed0ec5 Mon Sep 17 00:00:00 2001 From: Reem Tawfik Date: Mon, 3 Jun 2024 21:45:01 +0000 Subject: [PATCH] #9036: GS & BH --> Combine llk param files using variable args --- .../llk_math_eltwise_unary_sfpu_0_param.h | 51 ---------------- .../llk_math_eltwise_unary_sfpu_2_param.h | 56 ------------------ .../llk_math_eltwise_unary_sfpu_3_param.h | 57 ------------------ .../llk_math_eltwise_unary_sfpu_5_param.h | 59 ------------------- .../llk_math_eltwise_unary_sfpu_abs.h | 12 ++-- .../llk_math_eltwise_unary_sfpu_add1.h | 12 ++-- ...ath_eltwise_unary_sfpu_binop_with_scalar.h | 8 +-- ...th_eltwise_unary_sfpu_cast_fp32_to_fp16a.h | 9 ++- .../llk_math_eltwise_unary_sfpu_clamp.h | 12 ++-- .../llk_math_eltwise_unary_sfpu_comp.h | 36 +++++------ .../llk_math_eltwise_unary_sfpu_dropout.h | 12 ++-- .../llk_math_eltwise_unary_sfpu_elu.h | 9 ++- .../llk_math_eltwise_unary_sfpu_erf_erfc.h | 12 ++-- .../llk_math_eltwise_unary_sfpu_erfinv.h | 9 ++- .../llk_math_eltwise_unary_sfpu_exp.h | 16 +++-- .../llk_math_eltwise_unary_sfpu_exp2.h | 12 ++-- .../llk_math_eltwise_unary_sfpu_expm1.h | 9 ++- .../llk_math_eltwise_unary_sfpu_gelu.h | 16 ++--- .../llk_math_eltwise_unary_sfpu_hardtanh.h | 12 ++-- .../llk_math_eltwise_unary_sfpu_heaviside.h | 9 ++- .../llk_sfpu/llk_math_eltwise_unary_sfpu_i0.h | 9 ++- .../llk_math_eltwise_unary_sfpu_identity.h | 16 +++-- .../llk_math_eltwise_unary_sfpu_init.h | 4 +- .../llk_math_eltwise_unary_sfpu_isinf_isnan.h | 39 ++++++------ .../llk_math_eltwise_unary_sfpu_log.h | 15 ++--- ...math_eltwise_unary_sfpu_logical_not_noti.h | 9 ++- .../llk_math_eltwise_unary_sfpu_mask.h | 10 ++-- .../llk_math_eltwise_unary_sfpu_max.h | 12 ++-- .../llk_math_eltwise_unary_sfpu_min.h | 12 ++-- .../llk_math_eltwise_unary_sfpu_negative.h | 9 ++- ...h => llk_math_eltwise_unary_sfpu_params.h} | 21 +++---- .../llk_math_eltwise_unary_sfpu_power.h | 9 ++- .../llk_math_eltwise_unary_sfpu_recip.h | 11 ++-- .../llk_math_eltwise_unary_sfpu_relu.h | 24 ++++---- .../llk_math_eltwise_unary_sfpu_reverseops.h | 34 +++++------ .../llk_math_eltwise_unary_sfpu_rsqrt.h | 18 +++--- .../llk_math_eltwise_unary_sfpu_sigmoid.h | 9 ++- ...llk_math_eltwise_unary_sfpu_sigmoid_appx.h | 9 ++- .../llk_math_eltwise_unary_sfpu_sign.h | 12 ++-- .../llk_math_eltwise_unary_sfpu_signbit.h | 9 ++- .../llk_math_eltwise_unary_sfpu_silu.h | 12 ++-- .../llk_math_eltwise_unary_sfpu_sqrt.h | 11 ++-- .../llk_math_eltwise_unary_sfpu_square.h | 9 ++- .../llk_math_eltwise_unary_sfpu_tanh.h | 12 ++-- ..._math_eltwise_unary_sfpu_tanh_derivative.h | 9 ++- .../llk_math_eltwise_unary_sfpu_tiled_prod.h | 9 ++- .../llk_math_eltwise_unary_sfpu_topk.h | 40 ++++--------- ...llk_math_eltwise_unary_sfpu_trigonometry.h | 48 ++++++++------- .../llk_math_eltwise_unary_sfpu_unary_comp.h | 22 ++++--- .../llk_math_eltwise_unary_sfpu_0_param.h | 52 ---------------- .../llk_math_eltwise_unary_sfpu_add1.h | 10 ++-- ...ath_eltwise_unary_sfpu_binop_with_scalar.h | 8 +-- .../llk_math_eltwise_unary_sfpu_comp.h | 56 ++++++++++-------- .../llk_math_eltwise_unary_sfpu_elu.h | 15 +++-- .../llk_math_eltwise_unary_sfpu_erf_erfc.h | 22 +++---- .../llk_math_eltwise_unary_sfpu_erfinv.h | 13 ++-- .../llk_math_eltwise_unary_sfpu_exp.h | 16 ++--- .../llk_math_eltwise_unary_sfpu_exp2.h | 10 ++-- .../llk_math_eltwise_unary_sfpu_expm1.h | 10 ++-- .../llk_math_eltwise_unary_sfpu_gelu.h | 22 ++++--- .../llk_math_eltwise_unary_sfpu_heaviside.h | 11 ++-- .../llk_sfpu/llk_math_eltwise_unary_sfpu_i0.h | 13 ++-- .../llk_math_eltwise_unary_sfpu_identity.h | 15 ++--- .../llk_math_eltwise_unary_sfpu_isinf_isnan.h | 48 +++++++-------- ...math_eltwise_unary_sfpu_logical_not_noti.h | 13 ++-- .../llk_math_eltwise_unary_sfpu_mask.h | 15 ++--- .../llk_math_eltwise_unary_sfpu_min.h | 10 ++-- .../llk_math_eltwise_unary_sfpu_negative.h | 14 ++--- ...h => llk_math_eltwise_unary_sfpu_params.h} | 16 +++-- .../llk_math_eltwise_unary_sfpu_power.h | 11 ++-- .../llk_math_eltwise_unary_sfpu_recip.h | 14 ++--- .../llk_math_eltwise_unary_sfpu_relu.h | 38 ++++++------ .../llk_math_eltwise_unary_sfpu_reverseops.h | 13 ++-- .../llk_math_eltwise_unary_sfpu_rsqrt.h | 15 +++-- .../llk_math_eltwise_unary_sfpu_sigmoid.h | 10 ++-- ...llk_math_eltwise_unary_sfpu_sigmoid_appx.h | 12 ++-- .../llk_math_eltwise_unary_sfpu_sign.h | 10 ++-- .../llk_math_eltwise_unary_sfpu_signbit.h | 10 ++-- .../llk_math_eltwise_unary_sfpu_silu.h | 10 ++-- .../llk_math_eltwise_unary_sfpu_sqrt.h | 15 +++-- .../llk_math_eltwise_unary_sfpu_tiled_prod.h | 10 ++-- .../llk_math_eltwise_unary_sfpu_topk.h | 26 ++++---- ...llk_math_eltwise_unary_sfpu_trigonometry.h | 53 ++++++++--------- .../llk_math_eltwise_unary_sfpu_unary_comp.h | 31 +++++----- 84 files changed, 613 insertions(+), 945 deletions(-) delete mode 100644 tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_0_param.h delete mode 100644 tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_2_param.h delete mode 100644 tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_3_param.h delete mode 100644 tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_5_param.h rename tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/{llk_math_eltwise_unary_sfpu_1_param.h => llk_math_eltwise_unary_sfpu_params.h} (84%) delete mode 100644 tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_0_param.h rename tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/{llk_math_eltwise_unary_sfpu_1_param.h => llk_math_eltwise_unary_sfpu_params.h} (86%) diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_0_param.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_0_param.h deleted file mode 100644 index c32b783386f..00000000000 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_0_param.h +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -#pragma once -#include "llk_math_eltwise_unary_sfpu.h" -#include "llk_sfpu_types.h" - -template -inline void llk_math_eltwise_unary_sfpu_0_param( - void (*first_func)(), void (*func)(), uint dst_index, int vector_mode = (int)VectorMode::RC) { - math::set_dst_write_addr(dst_index); - - TTI_STALLWAIT(p_stall::STALL_SFPU, p_stall::MATH); - if (vector_mode == (int)VectorMode::R) { - // Do a row vector, Face0 + Face1 -- first iteration (first row) - const int ITERATIONS = 1; -#pragma GCC unroll 0 - for (int face = 0; face < 2; face++) { - first_func(); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } - // Skip the next 2 faces - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } else if (vector_mode == (int)VectorMode::C) { - // Do a column vector, Face0 + Face2 -- All iterations for full face -#pragma GCC unroll 0 - for (int face = 0; face < 2; face++) { - func(); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } - } else if (vector_mode == (int)VectorMode::RC) { - // Do all four faces, and iterate through all 4 blocks of 4 rows each -#pragma GCC unroll 0 - for (int face = 0; face < 4; face++) { - func(); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } - } else { - func(); - } - math::clear_dst_reg_addr(); -} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_2_param.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_2_param.h deleted file mode 100644 index 26bee1c110b..00000000000 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_2_param.h +++ /dev/null @@ -1,56 +0,0 @@ -// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -#pragma once -#include "llk_math_eltwise_unary_sfpu.h" -#include "llk_sfpu_types.h" - -template -inline void llk_math_eltwise_unary_sfpu_2_param( - void (*first_func)(uint, uint), - void (*func)(uint, uint), - uint dst_index, - int vector_mode = (int)VectorMode::RC, - uint param0 = 0, - uint param1 = 0) { - math::set_dst_write_addr(dst_index); - - TTI_STALLWAIT(p_stall::STALL_SFPU, p_stall::MATH); - if (vector_mode == (int)VectorMode::R) { - // Do a row vector, Face0 + Face1 -- first iteration (first row) - const int ITERATIONS = 1; -#pragma GCC unroll 0 - for (int face = 0; face < 2; face++) { - first_func(param0, param1); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } - // Skip the next 2 faces - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } else if (vector_mode == (int)VectorMode::C) { - // Do a column vector, Face0 + Face2 -- All iterations for full face -#pragma GCC unroll 0 - for (int face = 0; face < 2; face++) { - func(param0, param1); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } - } else if (vector_mode == (int)VectorMode::RC) { - // Do all four faces, and iterate through all 4 blocks of 4 rows each -#pragma GCC unroll 0 - for (int face = 0; face < 4; face++) { - func(param0, param1); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } - } else { - func(param0, param1); - } - math::clear_dst_reg_addr(); -} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_3_param.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_3_param.h deleted file mode 100644 index 7833d4653c1..00000000000 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_3_param.h +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -#pragma once -#include "llk_math_eltwise_unary_sfpu.h" -#include "llk_sfpu_types.h" - -template -inline void llk_math_eltwise_unary_sfpu_3_param( - void (*first_func)(uint, uint, uint), - void (*func)(uint, uint, uint), - uint dst_index, - int vector_mode = (int)VectorMode::RC, - uint param0 = 0, - uint param1 = 0, - uint param2 = 0) { - math::set_dst_write_addr(dst_index); - - TTI_STALLWAIT(p_stall::STALL_SFPU, p_stall::MATH); - if (vector_mode == (int)VectorMode::R) { - // Do a row vector, Face0 + Face1 -- first iteration (first row) - const int ITERATIONS = 1; -#pragma GCC unroll 0 - for (int face = 0; face < 2; face++) { - first_func(param0, param1, param2); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } - // Skip the next 2 faces - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } else if (vector_mode == (int)VectorMode::C) { - // Do a column vector, Face0 + Face2 -- All iterations for full face -#pragma GCC unroll 0 - for (int face = 0; face < 2; face++) { - func(param0, param1, param2); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } - } else if (vector_mode == (int)VectorMode::RC) { - // Do all four faces, and iterate through all 4 blocks of 4 rows each -#pragma GCC unroll 0 - for (int face = 0; face < 4; face++) { - func(param0, param1, param2); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } - } else { - func(param0, param1, param2); - } - math::clear_dst_reg_addr(); -} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_5_param.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_5_param.h deleted file mode 100644 index 3fb306fb94a..00000000000 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_5_param.h +++ /dev/null @@ -1,59 +0,0 @@ -// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -#pragma once -#include "llk_math_eltwise_unary_sfpu.h" -#include "llk_sfpu_types.h" - -template -inline void llk_math_eltwise_unary_sfpu_5_param( - void (*first_func)(uint, uint, uint, uint, uint), - void (*func)(uint, uint, uint, uint, uint), - uint dst_index, - int vector_mode = (int)VectorMode::RC, - uint param0 = 0, - uint param1 = 0, - uint param2 = 0, - uint param3 = 0, - uint param4 = 0) { - math::set_dst_write_addr(dst_index); - - TTI_STALLWAIT(p_stall::STALL_SFPU, p_stall::MATH); - if (vector_mode == (int)VectorMode::R) { - // Do a row vector, Face0 + Face1 -- first iteration (first row) - const int ITERATIONS = 1; -#pragma GCC unroll 0 - for (int face = 0; face < 2; face++) { - first_func(param0, param1, param2, param3, param4); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } - // Skip the next 2 faces - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } else if (vector_mode == (int)VectorMode::C) { - // Do a column vector, Face0 + Face2 -- All iterations for full face -#pragma GCC unroll 0 - for (int face = 0; face < 2; face++) { - func(param0, param1, param2, param3, param4); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } - } else if (vector_mode == (int)VectorMode::RC) { - // Do all four faces, and iterate through all 4 blocks of 4 rows each -#pragma GCC unroll 0 - for (int face = 0; face < 4; face++) { - func(param0, param1, param2, param3, param4); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } - } else { - func(param0, param1, param2, param3, param4); - } - math::clear_dst_reg_addr(); -} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_abs.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_abs.h index 9255a56de2c..6e483a8c5b0 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_abs.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_abs.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_abs.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_abs.h" namespace ckernel { @@ -19,8 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_abs_init() { template inline void llk_math_eltwise_unary_sfpu_abs(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_abs, ckernel::sfpu::calculate_abs, dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_abs, + dst_index, + vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_add1.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_add1.h index d4ff03cfaa1..c969db09fa3 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_add1.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_add1.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_add1.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_add1.h" namespace ckernel { @@ -19,8 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_add1_init() { template inline void llk_math_eltwise_unary_sfpu_add1(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_add1, ckernel::sfpu::calculate_add1, dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_add1, + dst_index, + vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_binop_with_scalar.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_binop_with_scalar.h index 79adbb30f5d..4174bd43c67 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_binop_with_scalar.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_binop_with_scalar.h @@ -5,7 +5,7 @@ #pragma once #include "ckernel_sfpu_binop_with_unary.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "llk_math_eltwise_unary_sfpu_init.h" namespace ckernel { @@ -13,10 +13,8 @@ namespace ckernel { // New LLK SFPU APIs template -inline void llk_math_eltwise_unary_sfpu_binop_with_scalar( - uint dst_index, uint32_t param1, int vector_mode = VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_binop_with_scalar, +inline void llk_math_eltwise_unary_sfpu_binop_with_scalar(uint dst_index, uint32_t param1, int vector_mode = VectorMode::RC) { + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_binop_with_scalar, dst_index, vector_mode, diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_cast_fp32_to_fp16a.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_cast_fp32_to_fp16a.h index 36b8d2989f4..4b64070106b 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_cast_fp32_to_fp16a.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_cast_fp32_to_fp16a.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_cast_fp32_to_fp16a.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_cast_fp32_to_fp16a.h" namespace ckernel { @@ -19,11 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_cast_fp32_to_fp16a_init() { template inline void llk_math_eltwise_unary_sfpu_cast_fp32_to_fp16a(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_cast_fp32_to_fp16a, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_cast_fp32_to_fp16a, dst_index, vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_clamp.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_clamp.h index 9cebd3de7ac..8b65ab47395 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_clamp.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_clamp.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_clamp.h" -#include "llk_math_eltwise_unary_sfpu_3_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_clamp.h" namespace ckernel { @@ -18,10 +18,8 @@ inline void llk_math_eltwise_unary_sfpu_clamp_init() { } template -inline void llk_math_eltwise_unary_sfpu_clamp( - uint dst_index, uint param0, uint param1, uint param2, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_3_param( - ckernel::sfpu::calculate_clamp, +inline void llk_math_eltwise_unary_sfpu_clamp(uint dst_index, uint param0, uint param1, uint param2, int vector_mode = (int)VectorMode::RC) { + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_clamp, dst_index, vector_mode, @@ -30,4 +28,4 @@ inline void llk_math_eltwise_unary_sfpu_clamp( param2); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_comp.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_comp.h index 81dfda5fe29..8d3009915de 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_comp.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_comp.h @@ -4,19 +4,18 @@ #pragma once -#include "ckernel_sfpu_comp.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_comp.h" namespace ckernel { // New LLK SFPU APIs -// EQZ +//EQZ template inline void llk_math_eltwise_unary_sfpu_eqz(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_comp, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_comp, dst_index, vector_mode, @@ -28,11 +27,10 @@ inline void llk_math_eltwise_unary_sfpu_eqz_init() { llk_math_eltwise_unary_sfpu_init(); } -// NEZ +//NEZ template inline void llk_math_eltwise_unary_sfpu_nez(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_comp, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_comp, dst_index, vector_mode, @@ -44,11 +42,10 @@ inline void llk_math_eltwise_unary_sfpu_nez_init() { llk_math_eltwise_unary_sfpu_init(); } -// LTZ +//LTZ template inline void llk_math_eltwise_unary_sfpu_ltz(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_comp, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_comp, dst_index, vector_mode, @@ -60,11 +57,10 @@ inline void llk_math_eltwise_unary_sfpu_ltz_init() { llk_math_eltwise_unary_sfpu_init(); } -// GTZ +//GTZ template inline void llk_math_eltwise_unary_sfpu_gtz(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_comp, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_comp, dst_index, vector_mode, @@ -76,11 +72,10 @@ inline void llk_math_eltwise_unary_sfpu_gtz_init() { llk_math_eltwise_unary_sfpu_init(); } -// LEZ +//LEZ template inline void llk_math_eltwise_unary_sfpu_lez(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_comp, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_comp, dst_index, vector_mode, @@ -92,11 +87,10 @@ inline void llk_math_eltwise_unary_sfpu_lez_init() { llk_math_eltwise_unary_sfpu_init(); } -// GEZ +//GEZ template inline void llk_math_eltwise_unary_sfpu_gez(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_comp, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_comp, dst_index, vector_mode, @@ -108,4 +102,4 @@ inline void llk_math_eltwise_unary_sfpu_gez_init() { llk_math_eltwise_unary_sfpu_init(); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_dropout.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_dropout.h index 4cc09ce7d23..4dfddab02ea 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_dropout.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_dropout.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_dropout.h" -#include "llk_math_eltwise_unary_sfpu_2_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_dropout.h" namespace ckernel { @@ -18,10 +18,8 @@ inline void llk_math_eltwise_unary_sfpu_dropout_init(uint seed = 0) { } template -inline void llk_math_eltwise_unary_sfpu_dropout( - uint dst_index, int vector_mode = (int)VectorMode::RC, int integer_dropout, int scale_factor) { - llk_math_eltwise_unary_sfpu_2_param( - ckernel::sfpu::calculate_dropout, +inline void llk_math_eltwise_unary_sfpu_dropout(uint dst_index, int vector_mode = (int)VectorMode::RC, int integer_dropout, int scale_factor) { + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_dropout, dst_index, vector_mode, @@ -29,4 +27,4 @@ inline void llk_math_eltwise_unary_sfpu_dropout( scale_factor); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_elu.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_elu.h index 8f357318dd8..017ace33960 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_elu.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_elu.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_elu.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_elu.h" namespace ckernel { @@ -19,12 +19,11 @@ inline void llk_math_eltwise_unary_sfpu_elu_init() { template inline void llk_math_eltwise_unary_sfpu_elu(uint dst_index, uint param0) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_elu, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_elu, dst_index, (int)VectorMode::RC, param0); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_erf_erfc.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_erf_erfc.h index 798b8d2677e..8fa11356c7c 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_erf_erfc.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_erf_erfc.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_erf_erfc.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_erf_erfc.h" namespace ckernel { @@ -24,8 +24,7 @@ inline void llk_math_eltwise_unary_sfpu_erfc_init() { template inline void llk_math_eltwise_unary_sfpu_erf(uint dst_index, int param0 = 0) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_sfpu_erf_erfc, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_sfpu_erf_erfc, dst_index, (int)VectorMode::RC); @@ -33,11 +32,10 @@ inline void llk_math_eltwise_unary_sfpu_erf(uint dst_index, int param0 = 0) { template inline void llk_math_eltwise_unary_sfpu_erfc(uint dst_index, int param0 = 0) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_sfpu_erf_erfc, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_sfpu_erf_erfc, dst_index, (int)VectorMode::RC); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_erfinv.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_erfinv.h index 18dfdaca649..9e9d9192b07 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_erfinv.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_erfinv.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_erfinv.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_erfinv.h" namespace ckernel { @@ -19,11 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_erfinv_init() { template inline void llk_math_eltwise_unary_sfpu_erfinv_op(uint dst_index) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_erfinv, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_erfinv, dst_index, (int)VectorMode::RC); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_exp.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_exp.h index 613dfa31f3f..85186d68102 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_exp.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_exp.h @@ -4,20 +4,18 @@ #pragma once -#include "ckernel_sfpu_exp.h" -#include "llk_math_eltwise_unary_sfpu_2_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_exp.h" namespace ckernel { // New LLK SFPU APIs -template -inline void llk_math_eltwise_unary_sfpu_exponential( - uint dst_index, int vector_mode = (int)VectorMode::RC, int param0 = ITERATIONS, int param1 = 0) { - constexpr int first_iterations = 1; - llk_math_eltwise_unary_sfpu_2_param( - ckernel::sfpu::calculate_exponential, +template +inline void llk_math_eltwise_unary_sfpu_exponential(uint dst_index, int vector_mode = (int)VectorMode::RC, int param0 = ITERATIONS, int param1 = 0) { + + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_exponential, dst_index, vector_mode, @@ -30,4 +28,4 @@ inline void llk_math_eltwise_unary_sfpu_exponential_init() { llk_math_eltwise_unary_sfpu_init(sfpu::exp_init); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_exp2.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_exp2.h index a60aef1b309..a70add82aa7 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_exp2.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_exp2.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_exp2.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_exp2.h" namespace ckernel { @@ -19,8 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_exp2_init() { template inline void llk_math_eltwise_unary_sfpu_exp2(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_exp2, ckernel::sfpu::calculate_exp2, dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_exp2, + dst_index, + vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_expm1.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_expm1.h index b11e6df35dd..fff928475af 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_expm1.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_expm1.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_expm1.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_expm1.h" namespace ckernel { @@ -19,11 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_expm1_init() { template inline void llk_math_eltwise_unary_sfpu_expm1(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_expm1, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_expm1, dst_index, vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_gelu.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_gelu.h index dfdb5f2ba2e..710418f49c6 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_gelu.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_gelu.h @@ -4,19 +4,17 @@ #pragma once -#include "ckernel_sfpu_gelu.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_gelu.h" namespace ckernel { // New LLK SFPU APIs template -inline void llk_math_eltwise_unary_sfpu_gelu(uint dst_index, int vector_mode = (int)VectorMode::RC, int param0 = 0) { - constexpr int first_iterations = 1; - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_gelu, +inline void llk_math_eltwise_unary_sfpu_gelu(uint dst_index, int vector_mode = (int)VectorMode::RC, int param0=0) { + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_gelu, dst_index, vector_mode); @@ -29,9 +27,7 @@ inline void llk_math_eltwise_unary_sfpu_gelu_init() { template inline void llk_math_eltwise_unary_sfpu_gelu_derivative(uint dst_index, int vector_mode = (int)VectorMode::RC) { - constexpr int first_iterations = 1; - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_gelu_derivative, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_gelu_derivative, dst_index, vector_mode); @@ -42,4 +38,4 @@ inline void llk_math_eltwise_unary_sfpu_gelu_derivative_init() { llk_math_eltwise_unary_sfpu_init(sfpu::gelu_derivative_init); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_hardtanh.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_hardtanh.h index 19b948b80af..bac1091c1a4 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_hardtanh.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_hardtanh.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_hardtanh.h" -#include "llk_math_eltwise_unary_sfpu_3_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_hardtanh.h" namespace ckernel { @@ -18,10 +18,8 @@ inline void llk_math_eltwise_unary_sfpu_hardtanh_init() { } template -inline void llk_math_eltwise_unary_sfpu_hardtanh( - uint dst_index, uint param0, uint param1, uint param2, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_3_param( - ckernel::sfpu::calculate_hardtanh, +inline void llk_math_eltwise_unary_sfpu_hardtanh(uint dst_index, uint param0, uint param1, uint param2, int vector_mode = (int)VectorMode::RC) { + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_hardtanh, dst_index, vector_mode, @@ -30,4 +28,4 @@ inline void llk_math_eltwise_unary_sfpu_hardtanh( param2); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_heaviside.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_heaviside.h index 990cb42ebb6..14bd2d537be 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_heaviside.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_heaviside.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_heaviside.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_heaviside.h" namespace ckernel { @@ -19,12 +19,11 @@ inline void llk_math_eltwise_unary_sfpu_heaviside_init() { template inline void llk_math_eltwise_unary_sfpu_heaviside(uint dst_index, uint param0, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_heaviside, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_heaviside, dst_index, vector_mode, param0); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_i0.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_i0.h index dfee05efd27..9a93496c669 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_i0.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_i0.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_i0.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_i0.h" namespace ckernel { @@ -19,11 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_i0_init() { template inline void llk_math_eltwise_unary_sfpu_i0_op(uint dst_index) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_i0, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_i0, dst_index, (int)VectorMode::RC); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_identity.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_identity.h index 91b5cfa54d9..73796336972 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_identity.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_identity.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_identity.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_identity.h" namespace ckernel { @@ -14,18 +14,16 @@ namespace ckernel { template inline void llk_math_eltwise_unary_sfpu_identity(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_identity, - ckernel::sfpu::calculate_identity, + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_identity, dst_index, vector_mode); } template inline void llk_math_eltwise_unary_sfpu_identity_uint32(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_identity_uint, - ckernel::sfpu::calculate_identity_uint, + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_identity_uint, dst_index, vector_mode); } @@ -35,4 +33,4 @@ inline void llk_math_eltwise_unary_sfpu_identity_init() { llk_math_eltwise_unary_sfpu_init(); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_init.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_init.h index b86fb4e51fa..4565c88949b 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_init.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_init.h @@ -4,8 +4,8 @@ #pragma once -#include "llk_math_eltwise_unary_sfpu.h" #include "llk_sfpu_types.h" +#include "llk_math_eltwise_unary_sfpu.h" namespace ckernel { @@ -29,4 +29,4 @@ inline void llk_math_eltwise_unary_sfpu_init_1_param(void (*func)(uint), uint pa math::reset_counters(p_setrwc::SET_ABD_F); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_isinf_isnan.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_isinf_isnan.h index 13291b49a12..9b2ceac7db4 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_isinf_isnan.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_isinf_isnan.h @@ -4,15 +4,16 @@ #pragma once -#include "ckernel_sfpu_isinf_isnan.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_isinf_isnan.h" namespace ckernel { // New LLK SFPU APIs -// isinf + +//isinf template inline void llk_math_eltwise_unary_sfpu_isinf_init() { llk_math_eltwise_unary_sfpu_init(); @@ -20,14 +21,14 @@ inline void llk_math_eltwise_unary_sfpu_isinf_init() { template inline void llk_math_eltwise_unary_sfpu_isinf(uint dst_index) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_sfpu_isinf_isnan, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_sfpu_isinf_isnan, dst_index, (int)VectorMode::RC); + } -// isposinf +//isposinf template inline void llk_math_eltwise_unary_sfpu_isposinf_init() { llk_math_eltwise_unary_sfpu_init(); @@ -35,29 +36,31 @@ inline void llk_math_eltwise_unary_sfpu_isposinf_init() { template inline void llk_math_eltwise_unary_sfpu_isposinf(uint dst_index) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_sfpu_isinf_isnan, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_sfpu_isinf_isnan, dst_index, (int)VectorMode::RC); + } -// isneginf + +//isneginf template inline void llk_math_eltwise_unary_sfpu_isneginf_init() { llk_math_eltwise_unary_sfpu_init(); } + template inline void llk_math_eltwise_unary_sfpu_isneginf(uint dst_index) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_sfpu_isinf_isnan, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_sfpu_isinf_isnan, dst_index, (int)VectorMode::RC); + } -// isnan +//isnan template inline void llk_math_eltwise_unary_sfpu_isnan_init() { llk_math_eltwise_unary_sfpu_init(); @@ -65,14 +68,14 @@ inline void llk_math_eltwise_unary_sfpu_isnan_init() { template inline void llk_math_eltwise_unary_sfpu_isnan(uint dst_index) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_sfpu_isinf_isnan, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_sfpu_isinf_isnan, dst_index, (int)VectorMode::RC); + } -// isfinite +//isfinite template inline void llk_math_eltwise_unary_sfpu_isfinite_init() { llk_math_eltwise_unary_sfpu_init(); @@ -80,11 +83,11 @@ inline void llk_math_eltwise_unary_sfpu_isfinite_init() { template inline void llk_math_eltwise_unary_sfpu_isfinite(uint dst_index) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_sfpu_isinf_isnan, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_sfpu_isinf_isnan, dst_index, (int)VectorMode::RC); + } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_log.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_log.h index 7cc67ec7915..ba454d3b38b 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_log.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_log.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_log.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_log.h" namespace ckernel { @@ -19,8 +19,7 @@ inline void llk_math_eltwise_unary_sfpu_log_init() { template inline void llk_math_eltwise_unary_sfpu_log(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_log, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_log, dst_index, vector_mode, @@ -33,14 +32,12 @@ inline void llk_math_eltwise_unary_sfpu_log_with_base_init() { } template -inline void llk_math_eltwise_unary_sfpu_log_with_base( - uint dst_index, uint base_scale, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_log, +inline void llk_math_eltwise_unary_sfpu_log_with_base(uint dst_index, uint base_scale, int vector_mode = (int)VectorMode::RC) { + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_log, dst_index, vector_mode, base_scale); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_logical_not_noti.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_logical_not_noti.h index aeb4b6154b5..b3e4828ee2d 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_logical_not_noti.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_logical_not_noti.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_logical_not_noti.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_logical_not_noti.h" namespace ckernel { @@ -19,11 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_logical_not_unary_init() { template inline void llk_math_eltwise_unary_sfpu_logical_not_unary_op(uint dst_index) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_logical_not_unary, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_logical_not_unary, dst_index, (int)VectorMode::RC); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_mask.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_mask.h index d70d16ef93b..b51a33b4230 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_mask.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_mask.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_mask.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_mask.h" namespace ckernel { @@ -19,12 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_mask_init() { template inline void llk_math_eltwise_unary_sfpu_mask(uint dst_index, int vector_mode = (int)VectorMode::RC) { - constexpr int first_iterations = 1; - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_mask, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_mask, dst_index, vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_max.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_max.h index fba36cba350..e330f10edf6 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_max.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_max.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_max.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_max.h" namespace ckernel { @@ -19,8 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_max_init() { template inline void llk_math_eltwise_unary_sfpu_max(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_max, ckernel::sfpu::calculate_max, dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_max, + dst_index, + vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_min.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_min.h index e9ed5b31483..d0daf95183f 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_min.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_min.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_min.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_min.h" namespace ckernel { @@ -19,8 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_min_init() { template inline void llk_math_eltwise_unary_sfpu_min(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_min, ckernel::sfpu::calculate_min, dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_min, + dst_index, + vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_negative.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_negative.h index 82c64c61314..1e830ded444 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_negative.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_negative.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_negative.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_negative.h" namespace ckernel { @@ -19,11 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_negative_init() { template inline void llk_math_eltwise_unary_sfpu_negative(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_negative, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_negative, dst_index, vector_mode); } -} // namespace ckernel +} // namespace ckernel diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_1_param.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_params.h similarity index 84% rename from tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_1_param.h rename to tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_params.h index e8ee9d5e29c..574ff588c69 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_1_param.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_params.h @@ -3,17 +3,18 @@ // SPDX-License-Identifier: Apache-2.0 #pragma once -#include "llk_math_eltwise_unary_sfpu.h" #include "llk_sfpu_types.h" +#include "llk_math_eltwise_unary_sfpu.h" -template -inline void llk_math_eltwise_unary_sfpu_1_param( - void (*first_func)(uint), - void (*func)(uint), +template +inline void llk_math_eltwise_unary_sfpu_params( + F&& sfpu_func, uint dst_index, int vector_mode = (int)VectorMode::RC, - uint param0 = 0) { + ARGS&& ... args) { + math::set_dst_write_addr(dst_index); + math::set_addr_mod_base(); TTI_STALLWAIT(p_stall::STALL_SFPU, p_stall::MATH); if (vector_mode == (int)VectorMode::R) { @@ -21,7 +22,7 @@ inline void llk_math_eltwise_unary_sfpu_1_param( const int ITERATIONS = 1; #pragma GCC unroll 0 for (int face = 0; face < 2; face++) { - first_func(param0); + sfpu_func(static_cast(args)...); TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); } @@ -34,7 +35,7 @@ inline void llk_math_eltwise_unary_sfpu_1_param( // Do a column vector, Face0 + Face2 -- All iterations for full face #pragma GCC unroll 0 for (int face = 0; face < 2; face++) { - func(param0); + sfpu_func(static_cast(args)...); TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); @@ -44,12 +45,12 @@ inline void llk_math_eltwise_unary_sfpu_1_param( // Do all four faces, and iterate through all 4 blocks of 4 rows each #pragma GCC unroll 0 for (int face = 0; face < 4; face++) { - func(param0); + sfpu_func(static_cast(args)...); TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); } } else { - func(param0); + sfpu_func(static_cast(args)...); } math::clear_dst_reg_addr(); } diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_power.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_power.h index 822caa9e132..b23838be088 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_power.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_power.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_power_iterative.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_power_iterative.h" namespace ckernel { @@ -19,12 +19,11 @@ inline void llk_math_eltwise_unary_sfpu_power_init() { template inline void llk_math_eltwise_unary_sfpu_power(uint dst_index, int pow = 0, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_power_iterative, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_power_iterative, dst_index, vector_mode, pow); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_recip.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_recip.h index 8558b829a89..376fc200436 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_recip.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_recip.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_recip.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_recip.h" namespace ckernel { @@ -14,12 +14,11 @@ namespace ckernel { template inline void llk_math_eltwise_unary_sfpu_reciprocal(uint dst_index, int vector_mode = (int)VectorMode::RC) { - constexpr int first_iterations = 1; - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_reciprocal, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_reciprocal, dst_index, vector_mode); + } template @@ -27,4 +26,4 @@ inline void llk_math_eltwise_unary_sfpu_reciprocal_init() { llk_math_eltwise_unary_sfpu_init(sfpu::recip_init); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_relu.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_relu.h index cc67f51c982..6e4589e0836 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_relu.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_relu.h @@ -4,14 +4,15 @@ #pragma once -#include "ckernel_sfpu_relu.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_relu.h" namespace ckernel { // New LLK SFPU APIs + template inline void llk_math_eltwise_unary_sfpu_relu_init() { llk_math_eltwise_unary_sfpu_init(); @@ -31,10 +32,10 @@ inline void llk_math_eltwise_unary_sfpu_relu_min_init() { llk_math_eltwise_unary_sfpu_init(); } + template inline void llk_math_eltwise_unary_sfpu_lrelu(uint dst_index, uint param0 = 0) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_lrelu, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_lrelu, dst_index, (int)VectorMode::RC, @@ -43,8 +44,7 @@ inline void llk_math_eltwise_unary_sfpu_lrelu(uint dst_index, uint param0 = 0) { template inline void llk_math_eltwise_unary_sfpu_relu_max(uint dst_index, uint param0 = 0) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::relu_max, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::relu_max, dst_index, (int)VectorMode::RC, @@ -53,8 +53,7 @@ inline void llk_math_eltwise_unary_sfpu_relu_max(uint dst_index, uint param0 = 0 template inline void llk_math_eltwise_unary_sfpu_relu_min(uint dst_index, uint param0 = 0) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::relu_min, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::relu_min, dst_index, (int)VectorMode::RC, @@ -63,8 +62,11 @@ inline void llk_math_eltwise_unary_sfpu_relu_min(uint dst_index, uint param0 = 0 template inline void llk_math_eltwise_unary_sfpu_relu(uint dst_index) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::relu_min, ckernel::sfpu::relu_min, dst_index, (int)VectorMode::RC, 0); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::relu_min, + dst_index, + (int)VectorMode::RC, + 0); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_reverseops.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_reverseops.h index baaaef6d9d1..be61a1b25d0 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_reverseops.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_reverseops.h @@ -4,27 +4,27 @@ #pragma once -#include "ckernel_reverseops.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_reverseops.h" + namespace ckernel { -/************** rsub ************/ + /************** rsub ************/ -template -inline void llk_math_eltwise_unary_sfpu_rsub_init() { - llk_math_eltwise_unary_sfpu_init(sfpu::rsub_init); -} + template + inline void llk_math_eltwise_unary_sfpu_rsub_init() { + llk_math_eltwise_unary_sfpu_init(sfpu::rsub_init); + } -template -inline void llk_math_eltwise_unary_sfpu_rsub(uint dst_index, uint param0 = 0) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_rsub, - ckernel::sfpu::calculate_rsub, - dst_index, - (int)VectorMode::RC, - param0); -} + template + inline void llk_math_eltwise_unary_sfpu_rsub(uint dst_index, uint param0 = 0) { + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_rsub, + dst_index, + (int)VectorMode::RC, + param0); + } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_rsqrt.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_rsqrt.h index dcb189a25fd..a8d7777ad69 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_rsqrt.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_rsqrt.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_rsqrt.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_rsqrt.h" namespace ckernel { @@ -24,17 +24,15 @@ inline void llk_math_eltwise_unary_sfpu_rsqrt(uint dst_index, int vector_mode = // The algorithm uses Newton's method based on no.of iteration better approximation can be calculated // if (APPROXIMATE) { - // llk_math_eltwise_unary_sfpu_0_param - // (ckernel::sfpu::calculate_rsqrt, + // llk_math_eltwise_unary_sfpu_params( // ckernel::sfpu::calculate_rsqrt, // dst_index, vector_mode); // } else { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_rsqrt, - ckernel::sfpu::calculate_rsqrt, - dst_index, - vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_rsqrt, + dst_index, + vector_mode); // } } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sigmoid.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sigmoid.h index 45d918d66b3..c8fb6e6ee64 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sigmoid.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sigmoid.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_sigmoid.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_sigmoid.h" namespace ckernel { @@ -19,11 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_sigmoid_init() { template inline void llk_math_eltwise_unary_sfpu_sigmoid(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_sigmoid, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_sigmoid, dst_index, vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sigmoid_appx.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sigmoid_appx.h index b9b6a3bd3c0..8d122f420d3 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sigmoid_appx.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sigmoid_appx.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_sigmoid_appx.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_sigmoid_appx.h" namespace ckernel { @@ -19,11 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_sigmoid_appx_init() { template inline void llk_math_eltwise_unary_sfpu_sigmoid_appx(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_sigmoid_appx, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_sigmoid_appx, dst_index, vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sign.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sign.h index 897d07b3095..05a43368cf2 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sign.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sign.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_sign.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_sign.h" namespace ckernel { @@ -19,8 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_sign_init() { template inline void llk_math_eltwise_unary_sfpu_sign(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_sign, ckernel::sfpu::calculate_sign, dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_sign, + dst_index, + vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_signbit.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_signbit.h index c8ad1b3284a..5e7cc49327b 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_signbit.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_signbit.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_signbit.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_signbit.h" namespace ckernel { @@ -19,11 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_signbit_init() { template inline void llk_math_eltwise_unary_sfpu_signbit(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_signbit, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_signbit, dst_index, vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_silu.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_silu.h index fbffc62d1b5..0bfdfb4b0cc 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_silu.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_silu.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_silu.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_silu.h" namespace ckernel { @@ -19,8 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_silu_init() { template inline void llk_math_eltwise_unary_sfpu_silu(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_silu, ckernel::sfpu::calculate_silu, dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_silu, + dst_index, + vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sqrt.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sqrt.h index 4fa9c910296..64166543b72 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sqrt.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sqrt.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_sqrt.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_sqrt.h" namespace ckernel { @@ -14,12 +14,11 @@ namespace ckernel { template inline void llk_math_eltwise_unary_sfpu_sqrt(uint dst_index, int vector_mode = (int)VectorMode::RC) { - constexpr int first_iterations = 1; - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_sqrt, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_sqrt, dst_index, vector_mode); + } template @@ -27,4 +26,4 @@ inline void llk_math_eltwise_unary_sfpu_sqrt_init() { llk_math_eltwise_unary_sfpu_init(sfpu::sqrt_init); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_square.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_square.h index 475d5dfaac0..90cadb977a0 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_square.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_square.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_square.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_square.h" namespace ckernel { @@ -19,11 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_square_init() { template inline void llk_math_eltwise_unary_sfpu_square(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_square, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_square, dst_index, vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_tanh.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_tanh.h index 505557dd11f..af6c0573953 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_tanh.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_tanh.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_tanh.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_tanh.h" namespace ckernel { @@ -19,8 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_tanh_init() { template inline void llk_math_eltwise_unary_sfpu_tanh(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_tanh, ckernel::sfpu::calculate_tanh, dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_tanh, + dst_index, + vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_tanh_derivative.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_tanh_derivative.h index b505f18166a..b793a0626b8 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_tanh_derivative.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_tanh_derivative.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_tanh_derivative.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_tanh_derivative.h" namespace ckernel { @@ -19,11 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_tanh_derivative_init() { template inline void llk_math_eltwise_unary_sfpu_tanh_derivative(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_tanh_derivative, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_tanh_derivative, dst_index, vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_tiled_prod.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_tiled_prod.h index 3d852b1774b..1867b1b7920 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_tiled_prod.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_tiled_prod.h @@ -4,9 +4,9 @@ #pragma once -#include "ckernel_sfpu_tiled_prod.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_tiled_prod.h" namespace ckernel { @@ -19,11 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_tiled_prod_init() { template inline void llk_math_eltwise_unary_sfpu_tiled_prod(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_tiled_prod, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_tiled_prod, dst_index, vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_topk.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_topk.h index bf7f1155278..e3a67a49e65 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_topk.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_topk.h @@ -4,10 +4,10 @@ #pragma once -#include "ckernel_sfpu_topk.h" -#include "llk_math_eltwise_unary_sfpu_2_param.h" -#include "llk_math_eltwise_unary_sfpu_5_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_topk.h" namespace ckernel { @@ -19,16 +19,9 @@ inline void llk_math_eltwise_unary_sfpu_topk_init() { } template -inline void llk_math_eltwise_unary_sfpu_topk_local_sort( - uint dst_index, - int idir, - int i_end_phase, - int i_start_phase, - int i_end_step, - int i_start_step, - int vector_mode = (int)VectorMode::RC_custom) { - llk_math_eltwise_unary_sfpu_5_param( - ckernel::sfpu::calculate_bitonic_topk_phases_steps, +inline void llk_math_eltwise_unary_sfpu_topk_local_sort(uint dst_index, int idir, int i_end_phase, int i_start_phase, + int i_end_step, int i_start_step, int vector_mode = (int)VectorMode::RC_custom) { + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_bitonic_topk_phases_steps, dst_index, vector_mode, @@ -40,10 +33,8 @@ inline void llk_math_eltwise_unary_sfpu_topk_local_sort( } template -inline void llk_math_eltwise_unary_sfpu_topk_merge( - uint dst_index, int m_iter, int k, int vector_mode = (int)VectorMode::RC_custom) { - llk_math_eltwise_unary_sfpu_2_param( - ckernel::sfpu::calculate_bitonic_topk_merge, +inline void llk_math_eltwise_unary_sfpu_topk_merge(uint dst_index, int m_iter, int k, int vector_mode = (int)VectorMode::RC_custom) { + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_bitonic_topk_merge, dst_index, vector_mode, @@ -52,16 +43,9 @@ inline void llk_math_eltwise_unary_sfpu_topk_merge( } template -inline void llk_math_eltwise_unary_sfpu_topk_rebuild( - uint dst_index, - bool idir, - int m_iter, - int k, - int logk, - int skip_second, - int vector_mode = (int)VectorMode::RC_custom) { - llk_math_eltwise_unary_sfpu_5_param( - ckernel::sfpu::calculate_bitonic_topk_rebuild, +inline void llk_math_eltwise_unary_sfpu_topk_rebuild(uint dst_index, bool idir, int m_iter, int k, int logk, + int skip_second, int vector_mode = (int)VectorMode::RC_custom) { + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_bitonic_topk_rebuild, dst_index, vector_mode, @@ -72,4 +56,4 @@ inline void llk_math_eltwise_unary_sfpu_topk_rebuild( skip_second); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_trigonometry.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_trigonometry.h index 19c5fc129fd..ac001d9a8e9 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_trigonometry.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_trigonometry.h @@ -4,15 +4,15 @@ #pragma once -#include "ckernel_sfpu_trigonometry.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_trigonometry.h" namespace ckernel { // New LLK SFPU APIs -// sine +//sine template inline void llk_math_eltwise_unary_sfpu_sine_init() { llk_math_eltwise_unary_sfpu_init(); @@ -20,14 +20,14 @@ inline void llk_math_eltwise_unary_sfpu_sine_init() { template inline void llk_math_eltwise_unary_sfpu_sine_op(uint dst_index) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_sfpu_trig, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_sfpu_trig, dst_index, (int)VectorMode::RC); } -// cosine + +//cosine template inline void llk_math_eltwise_unary_sfpu_cosine_init() { llk_math_eltwise_unary_sfpu_init(); @@ -35,14 +35,14 @@ inline void llk_math_eltwise_unary_sfpu_cosine_init() { template inline void llk_math_eltwise_unary_sfpu_cosine_op(uint dst_index) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_sfpu_trig, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_sfpu_trig, dst_index, (int)VectorMode::RC); } -// tangent + +//tangent template inline void llk_math_eltwise_unary_sfpu_tan_init() { llk_math_eltwise_unary_sfpu_init(); @@ -50,14 +50,14 @@ inline void llk_math_eltwise_unary_sfpu_tan_init() { template inline void llk_math_eltwise_unary_sfpu_tan_op(uint dst_index) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_sfpu_trig, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_sfpu_trig, dst_index, (int)VectorMode::RC); + } -// asin +//asin template inline void llk_math_eltwise_unary_sfpu_asin_init() { llk_math_eltwise_unary_sfpu_init(); @@ -65,11 +65,13 @@ inline void llk_math_eltwise_unary_sfpu_asin_init() { template inline void llk_math_eltwise_unary_sfpu_asin(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_asin, ckernel::sfpu::calculate_asin, dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_asin, + dst_index, + vector_mode); } -// acos +//acos template inline void llk_math_eltwise_unary_sfpu_acos_init() { llk_math_eltwise_unary_sfpu_init(); @@ -77,11 +79,13 @@ inline void llk_math_eltwise_unary_sfpu_acos_init() { template inline void llk_math_eltwise_unary_sfpu_acos(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_acos, ckernel::sfpu::calculate_acos, dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_acos, + dst_index, + vector_mode); } -// atan +//atan template inline void llk_math_eltwise_unary_sfpu_atan_init() { llk_math_eltwise_unary_sfpu_init(sfpu::atan_init); @@ -89,8 +93,10 @@ inline void llk_math_eltwise_unary_sfpu_atan_init() { template inline void llk_math_eltwise_unary_sfpu_atan(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param( - ckernel::sfpu::calculate_atan, ckernel::sfpu::calculate_atan, dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_atan, + dst_index, + vector_mode); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_unary_comp.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_unary_comp.h index 978b644bcf8..3b64e3fd35f 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_unary_comp.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_unary_comp.h @@ -4,15 +4,15 @@ #pragma once -#include "ckernel_sfpu_unary_comp.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" #include "llk_math_eltwise_unary_sfpu_init.h" +#include "llk_math_eltwise_unary_sfpu_params.h" +#include "ckernel_sfpu_unary_comp.h" namespace ckernel { // New LLK SFPU APIs -// Unary Not equal +//Unary Not equal template inline void llk_math_eltwise_unary_sfpu_unary_ne_init() { llk_math_eltwise_unary_sfpu_init(); @@ -20,15 +20,14 @@ inline void llk_math_eltwise_unary_sfpu_unary_ne_init() { template inline void llk_math_eltwise_unary_sfpu_unary_ne(uint dst_index, uint param0, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_unary_ne, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_unary_ne, dst_index, vector_mode, param0); } -// Unary greater than +//Unary greater than template inline void llk_math_eltwise_unary_sfpu_unary_gt_init() { llk_math_eltwise_unary_sfpu_init(); @@ -36,15 +35,15 @@ inline void llk_math_eltwise_unary_sfpu_unary_gt_init() { template inline void llk_math_eltwise_unary_sfpu_unary_gt(uint dst_index, uint param0, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_unary_gt, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_unary_gt, dst_index, vector_mode, param0); } -// Unary lesser than + +//Unary lesser than template inline void llk_math_eltwise_unary_sfpu_unary_lt_init() { llk_math_eltwise_unary_sfpu_init(); @@ -52,11 +51,10 @@ inline void llk_math_eltwise_unary_sfpu_unary_lt_init() { template inline void llk_math_eltwise_unary_sfpu_unary_lt(uint dst_index, uint param0, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_unary_lt, + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_unary_lt, dst_index, vector_mode, param0); } -} // namespace ckernel +} diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_0_param.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_0_param.h deleted file mode 100644 index e6cbac2b519..00000000000 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_0_param.h +++ /dev/null @@ -1,52 +0,0 @@ -// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -#pragma once -#include "llk_math_eltwise_unary_sfpu_common_includes.h" - - -template -inline void llk_math_eltwise_unary_sfpu_0_param( - void (*first_func)(), - void (*func)(), - uint dst_index, - int vector_mode = VectorMode::RC) { - - math::set_dst_write_addr(dst_index); - - if (vector_mode == VectorMode::R) { - // Do a row vector, Face0 + Face1 -- first iteration - const int ITERATIONS = 1; -#pragma GCC unroll 0 - for (int face = 0; face < 2; face++) { - first_func(); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } - // Skip the next 2 faces - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } else if (vector_mode == VectorMode::C) { - // Do a column vector, Face0 + Face2 -- full face -#pragma GCC unroll 0 - for (int face = 0; face < 2; face++) { - func(); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } - } else { -#pragma GCC unroll 0 - // Do all four faces, and iterate through all 4 blocks of 4 rows each - for (int face = 0; face < 4; face++) { - func(); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); - } - } - math::clear_dst_reg_addr(); -} diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_add1.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_add1.h index c48a817cdce..c882ab3f980 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_add1.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_add1.h @@ -5,7 +5,7 @@ #pragma once #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_add1.h" namespace ckernel { @@ -19,10 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_add1_init() { template inline void llk_math_eltwise_unary_sfpu_add1(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_add1, - ckernel::sfpu::calculate_add1, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_add1, + dst_index, + vector_mode); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_binop_with_scalar.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_binop_with_scalar.h index f5dd993ea35..1eb2837da24 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_binop_with_scalar.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_binop_with_scalar.h @@ -5,8 +5,7 @@ #pragma once #include "ckernel_sfpu_binop_with_unary.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" -#include "llk_math_eltwise_unary_sfpu_common_includes.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "llk_math_eltwise_unary_sfpu_init.h" namespace ckernel { @@ -14,9 +13,8 @@ namespace ckernel { // New LLK SFPU APIs template -inline void llk_math_eltwise_unary_sfpu_binop_with_scalar(uint dst_index, uint32_t param1, int vector_mode = VectorMode::RC ) { - llk_math_eltwise_unary_sfpu_1_param( - ckernel::sfpu::calculate_binop_with_scalar, +inline void llk_math_eltwise_unary_sfpu_binop_with_scalar(uint dst_index, uint32_t param1, int vector_mode = VectorMode::RC) { + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_binop_with_scalar, dst_index, vector_mode, diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_comp.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_comp.h index 32a52602b87..2a0bdc6d406 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_comp.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_comp.h @@ -5,7 +5,7 @@ #pragma once #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_comp.h" namespace ckernel { @@ -15,10 +15,11 @@ namespace ckernel { //EQZ template inline void llk_math_eltwise_unary_sfpu_eqz(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::calculate_comp, - ckernel::sfpu::calculate_comp, - dst_index, vector_mode, 8); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_comp, + dst_index, + vector_mode, + 8); } template @@ -29,10 +30,11 @@ inline void llk_math_eltwise_unary_sfpu_eqz_init() { //NEZ template inline void llk_math_eltwise_unary_sfpu_nez(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::calculate_comp, - ckernel::sfpu::calculate_comp, - dst_index, vector_mode, 8); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_comp, + dst_index, + vector_mode, + 8); } template @@ -43,10 +45,11 @@ inline void llk_math_eltwise_unary_sfpu_nez_init() { //LTZ template inline void llk_math_eltwise_unary_sfpu_ltz(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::calculate_comp, - ckernel::sfpu::calculate_comp, - dst_index, vector_mode, 8); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_comp, + dst_index, + vector_mode, + 8); } template @@ -57,10 +60,11 @@ inline void llk_math_eltwise_unary_sfpu_ltz_init() { //GTZ template inline void llk_math_eltwise_unary_sfpu_gtz(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::calculate_comp, - ckernel::sfpu::calculate_comp, - dst_index, vector_mode, 8); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_comp, + dst_index, + vector_mode, + 8); } template @@ -71,10 +75,11 @@ inline void llk_math_eltwise_unary_sfpu_gtz_init() { //LEZ template inline void llk_math_eltwise_unary_sfpu_lez(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::calculate_comp, - ckernel::sfpu::calculate_comp, - dst_index, vector_mode, 8); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_comp, + dst_index, + vector_mode, + 8); } template @@ -85,10 +90,11 @@ inline void llk_math_eltwise_unary_sfpu_lez_init() { //GEZ template inline void llk_math_eltwise_unary_sfpu_gez(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::calculate_comp, - ckernel::sfpu::calculate_comp, - dst_index, vector_mode, 8); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_comp, + dst_index, + vector_mode, + 8); } template diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_elu.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_elu.h index 13efde6ac15..4bd16fabbf5 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_elu.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_elu.h @@ -4,10 +4,8 @@ #pragma once - -#include "llk_math_eltwise_unary_sfpu_common_includes.h" #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_elu.h" namespace ckernel { @@ -20,11 +18,12 @@ inline void llk_math_eltwise_unary_sfpu_elu_init() { } template -inline void llk_math_eltwise_unary_sfpu_elu(uint dst_index, uint param0 = 0) { - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::calculate_elu, - ckernel::sfpu::calculate_elu, - dst_index, VectorMode::RC, param0); +inline void llk_math_eltwise_unary_sfpu_elu(uint dst_index, uint param0) { + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_elu, + dst_index, + (int)VectorMode::RC, + param0); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_erf_erfc.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_erf_erfc.h index 1be86344ae1..d2db3476d7e 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_erf_erfc.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_erf_erfc.h @@ -4,10 +4,8 @@ #pragma once - -#include "llk_math_eltwise_unary_sfpu_common_includes.h" #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_erf_erfc.h" namespace ckernel { @@ -25,20 +23,18 @@ inline void llk_math_eltwise_unary_sfpu_erfc_init() { template inline void llk_math_eltwise_unary_sfpu_erf(uint dst_index, int param0 = 0, int vector_mode = VectorMode::RC) { - constexpr int first_iterations = 1; - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_sfpu_erf_erfc, - ckernel::sfpu::calculate_sfpu_erf_erfc, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_sfpu_erf_erfc, + dst_index, + vector_mode); } template inline void llk_math_eltwise_unary_sfpu_erfc(uint dst_index, int param0 = 0, int vector_mode = VectorMode::RC) { - constexpr int first_iterations = 1; - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_sfpu_erf_erfc, - ckernel::sfpu::calculate_sfpu_erf_erfc, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_sfpu_erf_erfc, + dst_index, + vector_mode); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_erfinv.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_erfinv.h index f5f643e9f49..834c3c91cb5 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_erfinv.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_erfinv.h @@ -4,17 +4,14 @@ #pragma once - -#include "llk_math_eltwise_unary_sfpu_common_includes.h" #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_erfinv.h" namespace ckernel { // New LLK SFPU APIs -//isinf template inline void llk_math_eltwise_unary_sfpu_erfinv_init() { llk_math_eltwise_unary_sfpu_init(); @@ -22,10 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_erfinv_init() { template inline void llk_math_eltwise_unary_sfpu_erfinv_op(uint dst_index) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_erfinv, - ckernel::sfpu::calculate_erfinv, - dst_index, VectorMode::RC); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_erfinv, + dst_index, + (int)VectorMode::RC); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_exp.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_exp.h index 903df97f8bd..ea031d6a9cb 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_exp.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_exp.h @@ -4,10 +4,8 @@ #pragma once - -#include "llk_math_eltwise_unary_sfpu_common_includes.h" #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_exp.h" namespace ckernel { @@ -17,16 +15,14 @@ namespace ckernel { template inline void llk_math_eltwise_unary_sfpu_exponential(uint dst_index, int vector_mode = VectorMode::RC, int param0 = 0) { - constexpr bool zero_negative = false; - constexpr int first_iterations = 1; - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::calculate_exponential, + constexpr bool zero_negative = false; + llk_math_eltwise_unary_sfpu_params( ckernel::sfpu::calculate_exponential, - dst_index, vector_mode, param0); + dst_index, + vector_mode, + param0); } - - template inline void llk_math_eltwise_unary_sfpu_exponential_init() { llk_math_eltwise_unary_sfpu_init(sfpu::exp_init); diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_exp2.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_exp2.h index 8a86d886462..35e61e7ba0c 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_exp2.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_exp2.h @@ -5,7 +5,7 @@ #pragma once #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_exp2.h" namespace ckernel { @@ -19,10 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_exp2_init() { template inline void llk_math_eltwise_unary_sfpu_exp2(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_exp2, - ckernel::sfpu::calculate_exp2, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_exp2, + dst_index, + vector_mode); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_expm1.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_expm1.h index 33b850a4b8d..197846e91a8 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_expm1.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_expm1.h @@ -5,7 +5,7 @@ #pragma once #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_expm1.h" namespace ckernel { @@ -19,10 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_expm1_init() { template inline void llk_math_eltwise_unary_sfpu_expm1(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_expm1, - ckernel::sfpu::calculate_expm1, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_expm1, + dst_index, + vector_mode); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_gelu.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_gelu.h index 8e0ef03821d..df4419b386b 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_gelu.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_gelu.h @@ -3,9 +3,9 @@ // SPDX-License-Identifier: Apache-2.0 #pragma once -#include "llk_math_eltwise_unary_sfpu_common_includes.h" + #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_gelu.h" namespace ckernel { @@ -14,11 +14,10 @@ namespace ckernel { template inline void llk_math_eltwise_unary_sfpu_gelu(uint dst_index, int vector_mode = VectorMode::RC) { - constexpr int first_iterations = 1; - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_gelu, - ckernel::sfpu::calculate_gelu, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_gelu, + dst_index, + vector_mode); } template @@ -28,11 +27,10 @@ inline void llk_math_eltwise_unary_sfpu_gelu_init() { template inline void llk_math_eltwise_unary_sfpu_gelu_derivative(uint dst_index, int vector_mode = VectorMode::RC) { - constexpr int first_iterations = 1; - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_gelu_derivative, - ckernel::sfpu::calculate_gelu_derivative, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_gelu_derivative, + dst_index, + vector_mode); } template diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_heaviside.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_heaviside.h index 3fa60ce9b14..03f48d4d5e2 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_heaviside.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_heaviside.h @@ -5,7 +5,7 @@ #pragma once #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_heaviside.h" namespace ckernel { @@ -19,10 +19,11 @@ inline void llk_math_eltwise_unary_sfpu_heaviside_init() { template inline void llk_math_eltwise_unary_sfpu_heaviside(uint dst_index, uint param0, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::calculate_heaviside, - ckernel::sfpu::calculate_heaviside, - dst_index, vector_mode, param0); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_heaviside, + dst_index, + vector_mode, + param0); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_i0.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_i0.h index 972c8c50269..b90b847358c 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_i0.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_i0.h @@ -4,17 +4,14 @@ #pragma once - -#include "llk_math_eltwise_unary_sfpu_common_includes.h" #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_i0.h" namespace ckernel { // New LLK SFPU APIs -//isinf template inline void llk_math_eltwise_unary_sfpu_i0_init() { llk_math_eltwise_unary_sfpu_init(); @@ -22,10 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_i0_init() { template inline void llk_math_eltwise_unary_sfpu_i0_op(uint dst_index) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_i0, - ckernel::sfpu::calculate_i0, - dst_index, VectorMode::RC); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_i0, + dst_index, + (int)VectorMode::RC); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_identity.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_identity.h index f0f3b4e87d4..0684b950d1d 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_identity.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_identity.h @@ -4,9 +4,8 @@ #pragma once -#include "llk_math_eltwise_unary_sfpu_common_includes.h" #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_identity.h" namespace ckernel { @@ -14,13 +13,11 @@ namespace ckernel { // New LLK SFPU APIs template -inline void llk_math_eltwise_unary_sfpu_identity(uint dst_index, int vector_mode = VectorMode::RC) { - constexpr int first_iterations = 1; - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_identity, - ckernel::sfpu::calculate_identity, - dst_index, vector_mode); - +inline void llk_math_eltwise_unary_sfpu_identity(uint dst_index, int vector_mode = (int)VectorMode::RC) { + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_identity, + dst_index, + vector_mode); } template diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_isinf_isnan.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_isinf_isnan.h index 4b038bc5402..83af4b36601 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_isinf_isnan.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_isinf_isnan.h @@ -4,10 +4,8 @@ #pragma once - -#include "llk_math_eltwise_unary_sfpu_common_includes.h" #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_isinf_isnan.h" namespace ckernel { @@ -22,10 +20,10 @@ inline void llk_math_eltwise_unary_sfpu_isinf_init() { template inline void llk_math_eltwise_unary_sfpu_isinf(uint dst_index, int vector_mode = VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_sfpu_isinf_isnan, - ckernel::sfpu::calculate_sfpu_isinf_isnan, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_sfpu_isinf_isnan, + dst_index, + vector_mode); } @@ -37,10 +35,10 @@ inline void llk_math_eltwise_unary_sfpu_isposinf_init() { template inline void llk_math_eltwise_unary_sfpu_isposinf(uint dst_index, int vector_mode = VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_sfpu_isinf_isnan, - ckernel::sfpu::calculate_sfpu_isinf_isnan, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_sfpu_isinf_isnan, + dst_index, + vector_mode); } @@ -50,12 +48,14 @@ template inline void llk_math_eltwise_unary_sfpu_isneginf_init() { llk_math_eltwise_unary_sfpu_init(); } + + template inline void llk_math_eltwise_unary_sfpu_isneginf(uint dst_index, int vector_mode = VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_sfpu_isinf_isnan, - ckernel::sfpu::calculate_sfpu_isinf_isnan, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_sfpu_isinf_isnan, + dst_index, + vector_mode); } @@ -64,12 +64,13 @@ template inline void llk_math_eltwise_unary_sfpu_isnan_init() { llk_math_eltwise_unary_sfpu_init(); } + template inline void llk_math_eltwise_unary_sfpu_isnan(uint dst_index, int vector_mode = VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_sfpu_isinf_isnan, - ckernel::sfpu::calculate_sfpu_isinf_isnan, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_sfpu_isinf_isnan, + dst_index, + vector_mode); } @@ -78,12 +79,13 @@ template inline void llk_math_eltwise_unary_sfpu_isfinite_init() { llk_math_eltwise_unary_sfpu_init(); } + template inline void llk_math_eltwise_unary_sfpu_isfinite(uint dst_index, int vector_mode = VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_sfpu_isinf_isnan, - ckernel::sfpu::calculate_sfpu_isinf_isnan, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_sfpu_isinf_isnan, + dst_index, + vector_mode); } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_logical_not_noti.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_logical_not_noti.h index f5fb85fc935..4e074512a29 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_logical_not_noti.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_logical_not_noti.h @@ -4,10 +4,8 @@ #pragma once - -#include "llk_math_eltwise_unary_sfpu_common_includes.h" #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_logical_not_noti.h" namespace ckernel { @@ -18,12 +16,13 @@ template inline void llk_math_eltwise_unary_sfpu_logical_not_unary_init() { llk_math_eltwise_unary_sfpu_init(); } + template inline void llk_math_eltwise_unary_sfpu_logical_not_unary_op(uint dst_index) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_logical_not_unary, - ckernel::sfpu::calculate_logical_not_unary, - dst_index, VectorMode::RC); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_logical_not_unary, + dst_index, + (int)VectorMode::RC); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_mask.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_mask.h index 2b4551100af..b51a33b4230 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_mask.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_mask.h @@ -4,10 +4,8 @@ #pragma once - -#include "llk_math_eltwise_unary_sfpu_common_includes.h" #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_mask.h" namespace ckernel { @@ -20,12 +18,11 @@ inline void llk_math_eltwise_unary_sfpu_mask_init() { } template -inline void llk_math_eltwise_unary_sfpu_mask(uint dst_index, int vector_mode = VectorMode::RC) { - constexpr int first_iterations = 1; - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_mask, - ckernel::sfpu::calculate_mask, - dst_index, vector_mode); +inline void llk_math_eltwise_unary_sfpu_mask(uint dst_index, int vector_mode = (int)VectorMode::RC) { + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_mask, + dst_index, + vector_mode); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_min.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_min.h index 2b4a4150b30..0c356c5631d 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_min.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_min.h @@ -5,7 +5,7 @@ #pragma once #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_min.h" namespace ckernel { @@ -19,10 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_min_init() { template inline void llk_math_eltwise_unary_sfpu_min(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_min, - ckernel::sfpu::calculate_min, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_min, + dst_index, + vector_mode); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_negative.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_negative.h index 57cb1627daf..1e830ded444 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_negative.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_negative.h @@ -4,10 +4,8 @@ #pragma once - -#include "llk_math_eltwise_unary_sfpu_common_includes.h" #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_negative.h" namespace ckernel { @@ -20,11 +18,11 @@ inline void llk_math_eltwise_unary_sfpu_negative_init() { } template -inline void llk_math_eltwise_unary_sfpu_negative(uint dst_index, int vector_mode = VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_negative, - ckernel::sfpu::calculate_negative, - dst_index, vector_mode); +inline void llk_math_eltwise_unary_sfpu_negative(uint dst_index, int vector_mode = (int)VectorMode::RC) { + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_negative, + dst_index, + vector_mode); } } // namespace ckernel diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_1_param.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_params.h similarity index 86% rename from tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_1_param.h rename to tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_params.h index 124d15c44df..e72732966f5 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_1_param.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_params.h @@ -5,14 +5,12 @@ #pragma once #include "llk_math_eltwise_unary_sfpu_common_includes.h" - -template -inline void llk_math_eltwise_unary_sfpu_1_param( - void (*first_func)(PARAMTYPE), - void (*func)(PARAMTYPE), +template +inline void llk_math_eltwise_unary_sfpu_params( + F&& sfpu_func, uint dst_index, int vector_mode = VectorMode::RC, - int param0 = 0) { + ARGS&& ... args) { math::set_dst_write_addr(dst_index); @@ -21,7 +19,7 @@ inline void llk_math_eltwise_unary_sfpu_1_param( const int ITERATIONS = 1; #pragma GCC unroll 0 for (int face = 0; face < 2; face++) { - first_func(param0); + sfpu_func(static_cast(args)...); TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); } @@ -34,7 +32,7 @@ inline void llk_math_eltwise_unary_sfpu_1_param( // Do a column vector, Face0 + Face2 -- full face #pragma GCC unroll 0 for (int face = 0; face < 2; face++) { - func(param0); + sfpu_func(static_cast(args)...); TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); @@ -44,7 +42,7 @@ inline void llk_math_eltwise_unary_sfpu_1_param( #pragma GCC unroll 0 // Do all four faces, and iterate through all 4 blocks of 4 rows each for (int face = 0; face < 4; face++) { - func(param0); + sfpu_func(static_cast(args)...); TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_power.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_power.h index 7e1645b78d2..8ba39a0c470 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_power.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_power.h @@ -5,7 +5,7 @@ #pragma once #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_power_iterative.h" namespace ckernel { @@ -19,10 +19,11 @@ inline void llk_math_eltwise_unary_sfpu_power_init() { template inline void llk_math_eltwise_unary_sfpu_power(uint dst_index, int pow = 0, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::calculate_power_iterative, - ckernel::sfpu::calculate_power_iterative, - dst_index, vector_mode, pow); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_power_iterative, + dst_index, + vector_mode, + pow); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_recip.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_recip.h index 2eae6433158..1d5807adf1e 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_recip.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_recip.h @@ -4,9 +4,8 @@ #pragma once -#include "llk_math_eltwise_unary_sfpu_common_includes.h" #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_recip.h" namespace ckernel { @@ -14,12 +13,11 @@ namespace ckernel { // New LLK SFPU APIs template -inline void llk_math_eltwise_unary_sfpu_reciprocal(uint dst_index, int vector_mode = VectorMode::RC) { - constexpr int first_iterations = 1; - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_reciprocal, - ckernel::sfpu::calculate_reciprocal, - dst_index, vector_mode); +inline void llk_math_eltwise_unary_sfpu_reciprocal(uint dst_index, int vector_mode = (int)VectorMode::RC) { + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_reciprocal, + dst_index, + vector_mode); } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_relu.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_relu.h index 56f16e90a64..e5b0cf849dc 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_relu.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_relu.h @@ -7,7 +7,7 @@ #include "llk_math_eltwise_unary_sfpu_common_includes.h" #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_relu.h" namespace ckernel { @@ -21,10 +21,11 @@ inline void llk_math_eltwise_unary_sfpu_relu_max_init() { } template inline void llk_math_eltwise_unary_sfpu_relu_max(uint dst_index, uint param0) { - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::relu_max, - ckernel::sfpu::relu_max, - dst_index, VectorMode::RC, param0); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::relu_max, + dst_index, + VectorMode::RC, + param0); } // RELU MIN @@ -35,10 +36,11 @@ inline void llk_math_eltwise_unary_sfpu_relu_min_init() { template inline void llk_math_eltwise_unary_sfpu_relu_min(uint dst_index, uint param0 = 0) { - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::relu_min, - ckernel::sfpu::relu_min, - dst_index, VectorMode::RC, param0); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::relu_min, + dst_index, + VectorMode::RC, + param0); } // RELU @@ -46,10 +48,11 @@ inline void llk_math_eltwise_unary_sfpu_relu_min(uint dst_index, uint param0 = 0 //relu = relu_min @ threshold = 0 template inline void llk_math_eltwise_unary_sfpu_relu(uint dst_index) { - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::relu_min, - ckernel::sfpu::relu_min, - dst_index, VectorMode::RC, 0); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::relu_min, + dst_index, + VectorMode::RC, + 0); } @@ -67,10 +70,11 @@ inline void llk_math_eltwise_unary_sfpu_lrelu_init() { template inline void llk_math_eltwise_unary_sfpu_lrelu(uint dst_index, int param0 = 0) { - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::calculate_lrelu, - ckernel::sfpu::calculate_lrelu, - dst_index, VectorMode::RC, param0); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_lrelu, + dst_index, + VectorMode::RC, + param0); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_reverseops.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_reverseops.h index d116d57c80f..c960d7e9aea 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_reverseops.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_reverseops.h @@ -3,9 +3,9 @@ // SPDX-License-Identifier: Apache-2.0 #pragma once -#include "llk_math_eltwise_unary_sfpu_common_includes.h" + #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_reverseops.h" @@ -20,10 +20,11 @@ namespace ckernel { template inline void llk_math_eltwise_unary_sfpu_rsub(uint dst_index, uint param0 = 0) { - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::calculate_rsub, - ckernel::sfpu::calculate_rsub, - dst_index, VectorMode::RC, param0); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_rsub, + dst_index, + (int)VectorMode::RC, + param0); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_rsqrt.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_rsqrt.h index 8a552734aa4..8aecb2f3e1c 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_rsqrt.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_rsqrt.h @@ -5,7 +5,7 @@ #pragma once #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_rsqrt.h" namespace ckernel { @@ -24,15 +24,14 @@ inline void llk_math_eltwise_unary_sfpu_rsqrt(uint dst_index, int vector_mode = // The algorithm uses Newton's method based on no.of iteration better approximation can be calculated // if (APPROXIMATE) { - // llk_math_eltwise_unary_sfpu_0_param - // (ckernel::sfpu::calculate_rsqrt, - // ckernel::sfpu::calculate_rsqrt, + // llk_math_eltwise_unary_sfpu_params( + // ckernel::sfpu::calculate_rsqrt, // dst_index, vector_mode); // } else { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_rsqrt, - ckernel::sfpu::calculate_rsqrt, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_rsqrt, + dst_index, + vector_mode); // } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sigmoid.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sigmoid.h index d4b3dd07456..73f0b11d18e 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sigmoid.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sigmoid.h @@ -5,7 +5,7 @@ #pragma once #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_sigmoid.h" namespace ckernel { @@ -19,10 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_sigmoid_init() { template inline void llk_math_eltwise_unary_sfpu_sigmoid(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_sigmoid, - ckernel::sfpu::calculate_sigmoid, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_sigmoid, + dst_index, + vector_mode); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sigmoid_appx.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sigmoid_appx.h index d38f55aac38..78a8f41c3ea 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sigmoid_appx.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sigmoid_appx.h @@ -5,7 +5,7 @@ #pragma once #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_sigmoid_appx.h" namespace ckernel { @@ -14,15 +14,15 @@ namespace ckernel { template inline void llk_math_eltwise_unary_sfpu_sigmoid_appx_init() { - llk_math_eltwise_unary_sfpu_init(sfpu::sigmoid_appx_init); + llk_math_eltwise_unary_sfpu_init(); } template inline void llk_math_eltwise_unary_sfpu_sigmoid_appx(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_sigmoid_appx, - ckernel::sfpu::calculate_sigmoid_appx, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_sigmoid_appx, + dst_index, + vector_mode); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sign.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sign.h index 8af4f84ae67..7cfd7280628 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sign.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sign.h @@ -5,7 +5,7 @@ #pragma once #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_sign.h" namespace ckernel { @@ -19,10 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_sign_init() { template inline void llk_math_eltwise_unary_sfpu_sign(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_sign, - ckernel::sfpu::calculate_sign, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_sign, + dst_index, + vector_mode); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_signbit.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_signbit.h index 7c6788e21d1..9ec0bc0cbcc 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_signbit.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_signbit.h @@ -5,7 +5,7 @@ #pragma once #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_signbit.h" namespace ckernel { @@ -19,10 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_signbit_init() { template inline void llk_math_eltwise_unary_sfpu_signbit(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_signbit, - ckernel::sfpu::calculate_signbit, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_signbit, + dst_index, + vector_mode); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_silu.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_silu.h index 65b7bedf5eb..7f46202625f 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_silu.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_silu.h @@ -5,7 +5,7 @@ #pragma once #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_silu.h" namespace ckernel { @@ -19,10 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_silu_init() { template inline void llk_math_eltwise_unary_sfpu_silu(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_silu, - ckernel::sfpu::calculate_silu, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_silu, + dst_index, + vector_mode); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sqrt.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sqrt.h index 023ca322928..2dc1e46068e 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sqrt.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_sqrt.h @@ -3,9 +3,9 @@ // SPDX-License-Identifier: Apache-2.0 #pragma once -#include "llk_math_eltwise_unary_sfpu_common_includes.h" + #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_sqrt.h" namespace ckernel { @@ -13,12 +13,11 @@ namespace ckernel { // New LLK SFPU APIs template -inline void llk_math_eltwise_unary_sfpu_sqrt(uint dst_index, int vector_mode = VectorMode::RC) { - constexpr int first_iterations = 1; - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_sqrt, - ckernel::sfpu::calculate_sqrt, - dst_index, vector_mode); +inline void llk_math_eltwise_unary_sfpu_sqrt(uint dst_index, int vector_mode = (int)VectorMode::RC) { + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_sqrt, + dst_index, + vector_mode); } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_tiled_prod.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_tiled_prod.h index 929a71a8815..e6d00f887c8 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_tiled_prod.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_tiled_prod.h @@ -5,7 +5,7 @@ #pragma once #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_tiled_prod.h" namespace ckernel { @@ -19,10 +19,10 @@ inline void llk_math_eltwise_unary_sfpu_tiled_prod_init() { template inline void llk_math_eltwise_unary_sfpu_tiled_prod(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_tiled_prod, - ckernel::sfpu::calculate_tiled_prod, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_tiled_prod, + dst_index, + vector_mode); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_topk.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_topk.h index 7c7313e45b2..345d06d8151 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_topk.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_topk.h @@ -5,7 +5,7 @@ #pragma once #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_topk.h" namespace ckernel { @@ -22,29 +22,29 @@ inline void llk_math_eltwise_unary_sfpu_topk_init() { template inline void llk_math_eltwise_unary_sfpu_topk_local_sort(uint dst_index, int idir, int i_end_phase, int i_start_phase, int i_end_step, int i_start_step, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_bitonic_topk_phases_steps, - ckernel::sfpu::calculate_bitonic_topk_phases_steps, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_bitonic_topk_phases_steps, + dst_index, + vector_mode); } // llk_math_eltwise_unary_sfpu_topk_merge is unused for Grayskull template inline void llk_math_eltwise_unary_sfpu_topk_merge(uint dst_index, int m_iter, int k, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_bitonic_topk_merge, - ckernel::sfpu::calculate_bitonic_topk_merge, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_bitonic_topk_merge, + dst_index, + vector_mode); } // llk_math_eltwise_unary_sfpu_topk_rebuild is unused for Grayskull template inline void llk_math_eltwise_unary_sfpu_topk_rebuild(uint dst_index, bool idir, int m_iter, int k, int logk, int skip_second, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_bitonic_topk_rebuild, - ckernel::sfpu::calculate_bitonic_topk_rebuild, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_bitonic_topk_rebuild, + dst_index, + vector_mode); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_trigonometry.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_trigonometry.h index 5982c780ed6..d99ca4631c6 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_trigonometry.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_trigonometry.h @@ -4,10 +4,8 @@ #pragma once - -#include "llk_math_eltwise_unary_sfpu_common_includes.h" #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_0_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_trigonometry.h" namespace ckernel { @@ -22,10 +20,10 @@ inline void llk_math_eltwise_unary_sfpu_sine_init() { template inline void llk_math_eltwise_unary_sfpu_sine_op(uint dst_index, int vector_mode = VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_sfpu_trig, - ckernel::sfpu::calculate_sfpu_trig, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_sfpu_trig, + dst_index, + vector_mode); } @@ -37,10 +35,10 @@ inline void llk_math_eltwise_unary_sfpu_cosine_init() { template inline void llk_math_eltwise_unary_sfpu_cosine_op(uint dst_index, int vector_mode = VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_sfpu_trig, - ckernel::sfpu::calculate_sfpu_trig, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_sfpu_trig, + dst_index, + vector_mode); } @@ -52,11 +50,10 @@ inline void llk_math_eltwise_unary_sfpu_tan_init() { template inline void llk_math_eltwise_unary_sfpu_tan_op(uint dst_index, int vector_mode = VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_sfpu_trig, - ckernel::sfpu::calculate_sfpu_trig, - dst_index, vector_mode); - + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_sfpu_trig, + dst_index, + vector_mode); } //asin @@ -67,10 +64,10 @@ inline void llk_math_eltwise_unary_sfpu_asin_init() { template inline void llk_math_eltwise_unary_sfpu_asin(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_asin, - ckernel::sfpu::calculate_asin, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_asin, + dst_index, + vector_mode); } //acos @@ -81,10 +78,10 @@ inline void llk_math_eltwise_unary_sfpu_acos_init() { template inline void llk_math_eltwise_unary_sfpu_acos(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_acos, - ckernel::sfpu::calculate_acos, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_acos, + dst_index, + vector_mode); } //atan @@ -95,10 +92,10 @@ inline void llk_math_eltwise_unary_sfpu_atan_init() { template inline void llk_math_eltwise_unary_sfpu_atan(uint dst_index, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_0_param - (ckernel::sfpu::calculate_atan, - ckernel::sfpu::calculate_atan, - dst_index, vector_mode); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_atan, + dst_index, + vector_mode); } } diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_unary_comp.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_unary_comp.h index b2089ae6168..02e720c837b 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_unary_comp.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_unary_comp.h @@ -5,7 +5,7 @@ #pragma once #include "llk_math_eltwise_unary_sfpu_init.h" -#include "llk_math_eltwise_unary_sfpu_1_param.h" +#include "llk_math_eltwise_unary_sfpu_params.h" #include "ckernel_sfpu_unary_comp.h" namespace ckernel { @@ -20,10 +20,11 @@ inline void llk_math_eltwise_unary_sfpu_unary_ne_init() { template inline void llk_math_eltwise_unary_sfpu_unary_ne(uint dst_index, uint param0, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::calculate_unary_ne, - ckernel::sfpu::calculate_unary_ne, - dst_index, vector_mode, param0); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_unary_ne, + dst_index, + vector_mode, + param0); } //Unary greater than @@ -34,12 +35,14 @@ inline void llk_math_eltwise_unary_sfpu_unary_gt_init() { template inline void llk_math_eltwise_unary_sfpu_unary_gt(uint dst_index, uint param0, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::calculate_unary_gt, - ckernel::sfpu::calculate_unary_gt, - dst_index, vector_mode, param0); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_unary_gt, + dst_index, + vector_mode, + param0); } + //Unary lesser than template inline void llk_math_eltwise_unary_sfpu_unary_lt_init() { @@ -48,10 +51,10 @@ inline void llk_math_eltwise_unary_sfpu_unary_lt_init() { template inline void llk_math_eltwise_unary_sfpu_unary_lt(uint dst_index, uint param0, int vector_mode = (int)VectorMode::RC) { - llk_math_eltwise_unary_sfpu_1_param - (ckernel::sfpu::calculate_unary_lt, - ckernel::sfpu::calculate_unary_lt, - dst_index, vector_mode, param0); + llk_math_eltwise_unary_sfpu_params( + ckernel::sfpu::calculate_unary_lt, + dst_index, + vector_mode, + param0); } - }