From ae9b42dcd06f5179223452cb045e27f9f1854b2d Mon Sep 17 00:00:00 2001 From: Radomir Djogo Date: Mon, 9 Dec 2024 22:22:09 +0000 Subject: [PATCH] #15122: Fix add int32 LLK --- .../llk_api/llk_sfpu/ckernel_sfpu_add_int32.h | 4 ++-- .../llk_math_eltwise_binary_sfpu_add_int32.h | 4 ++-- .../llk_api/llk_sfpu/ckernel_sfpu_add_int32.h | 4 ++-- .../llk_math_eltwise_binary_sfpu_add_int32.h | 4 ++-- .../include/compute_kernel_api/add_int32_sfpu.h | 17 ++++++++++------- tt_metal/third_party/tt_llk_blackhole | 2 +- tt_metal/third_party/tt_llk_wormhole_b0 | 2 +- 7 files changed, 20 insertions(+), 17 deletions(-) diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/ckernel_sfpu_add_int32.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/ckernel_sfpu_add_int32.h index fff976fbf0b..ac685fb0d75 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/ckernel_sfpu_add_int32.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/ckernel_sfpu_add_int32.h @@ -13,9 +13,9 @@ using namespace sfpi; namespace ckernel { namespace sfpu { -template +template inline void calculate_add_int32(const uint dst_offset) { - _add_int32_(dst_offset); + _add_int32_(dst_offset); } } // namespace sfpu diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_binary_sfpu_add_int32.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_binary_sfpu_add_int32.h index db9d2579956..907847502f9 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_binary_sfpu_add_int32.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_binary_sfpu_add_int32.h @@ -17,11 +17,11 @@ inline void llk_math_eltwise_binary_sfpu_add_int32_init() { llk_math_eltwise_binary_sfpu_init(); } -template +template inline void llk_math_eltwise_binary_sfpu_add_int32( uint dst_index0, uint32_t dst_index1, int vector_mode = VectorMode::RC) { llk_math_eltwise_binary_sfpu_params( - ckernel::sfpu::calculate_add_int32, dst_index0, dst_index1, vector_mode); + ckernel::sfpu::calculate_add_int32, dst_index0, dst_index1, vector_mode); } } // namespace ckernel diff --git a/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_add_int32.h b/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_add_int32.h index fff976fbf0b..ac685fb0d75 100644 --- a/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_add_int32.h +++ b/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_add_int32.h @@ -13,9 +13,9 @@ using namespace sfpi; namespace ckernel { namespace sfpu { -template +template inline void calculate_add_int32(const uint dst_offset) { - _add_int32_(dst_offset); + _add_int32_(dst_offset); } } // namespace sfpu diff --git a/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/llk_math_eltwise_binary_sfpu_add_int32.h b/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/llk_math_eltwise_binary_sfpu_add_int32.h index db9d2579956..907847502f9 100644 --- a/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/llk_math_eltwise_binary_sfpu_add_int32.h +++ b/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/llk_math_eltwise_binary_sfpu_add_int32.h @@ -17,11 +17,11 @@ inline void llk_math_eltwise_binary_sfpu_add_int32_init() { llk_math_eltwise_binary_sfpu_init(); } -template +template inline void llk_math_eltwise_binary_sfpu_add_int32( uint dst_index0, uint32_t dst_index1, int vector_mode = VectorMode::RC) { llk_math_eltwise_binary_sfpu_params( - ckernel::sfpu::calculate_add_int32, dst_index0, dst_index1, vector_mode); + ckernel::sfpu::calculate_add_int32, dst_index0, dst_index1, vector_mode); } } // namespace ckernel diff --git a/tt_metal/include/compute_kernel_api/add_int32_sfpu.h b/tt_metal/include/compute_kernel_api/add_int32_sfpu.h index 89103555821..4de5ee5b55a 100644 --- a/tt_metal/include/compute_kernel_api/add_int32_sfpu.h +++ b/tt_metal/include/compute_kernel_api/add_int32_sfpu.h @@ -24,15 +24,18 @@ namespace ckernel { * * Return value: None * - * | Argument | Description | Type | Valid Range | - * Required | - * |----------------|-----------------------------------------------------------------------|----------|-------------------------------------------------------|----------| - * | idst0 | The index of the tile in DST register buffer to use as first operand | uint32_t | Must be less - * than the size of the DST register buffer | True | | idst1 | The index of the tile in DST register buffer - * to use as second operand | uint32_t | Must be less than the size of the DST register buffer | True | + * | Argument | Description | Type | + * Valid Range | Required | + * |-----------------------|-----------------------------------------------------------------------------|----------|-------------------------------------------------------|----------| + * | idst0 | The index of the tile in DST register buffer to use as first operand | uint32_t | + * Must be less than the size of the DST register buffer | True | | idst1 | The index of the tile in + * DST register buffer to use as second operand | uint32_t | Must be less than the size of the DST register buffer + * | True | | sign_magnitude_format | Whether the Int32 values are in sign-magnitude format (not 2's complement) | + * bool | | False | */ +template ALWI void add_int32_tile(uint32_t idst0, uint32_t idst1) { - MATH((llk_math_eltwise_binary_sfpu_add_int32(idst0, idst1))); + MATH((llk_math_eltwise_binary_sfpu_add_int32(idst0, idst1))); } /** diff --git a/tt_metal/third_party/tt_llk_blackhole b/tt_metal/third_party/tt_llk_blackhole index 8b5afa5b0f9..4f8a304dbcd 160000 --- a/tt_metal/third_party/tt_llk_blackhole +++ b/tt_metal/third_party/tt_llk_blackhole @@ -1 +1 @@ -Subproject commit 8b5afa5b0f92841f13d49263482bdde6aaeef4ca +Subproject commit 4f8a304dbcd4f85e3211f2a5fb9a86963e47f6d9 diff --git a/tt_metal/third_party/tt_llk_wormhole_b0 b/tt_metal/third_party/tt_llk_wormhole_b0 index ed02df9eb4b..216533851de 160000 --- a/tt_metal/third_party/tt_llk_wormhole_b0 +++ b/tt_metal/third_party/tt_llk_wormhole_b0 @@ -1 +1 @@ -Subproject commit ed02df9eb4bbfb37da1b9d9a8a129f1f6842a6cd +Subproject commit 216533851dea9d966ad2a350f07fcb1943beb711