Skip to content

Commit

Permalink
Fix load/store mode for add int32 (#48)
Browse files Browse the repository at this point in the history
* Add template arg for load/store mode
  • Loading branch information
rdjogoTT authored Dec 10, 2024
1 parent ed02df9 commit 0f57d4e
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions common/inc/sfpu/ckernel_sfpu_add_int32.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,26 @@ namespace ckernel
namespace sfpu
{

template <bool APPROXIMATION_MODE, int ITERATIONS>
template <bool APPROXIMATION_MODE, bool SIGN_MAGNITUDE_FORMAT, int ITERATIONS>
inline void _add_int32_(const uint dst_offset) {
// Use '12' if Dest is in sign-magnitude format and '4' for 2's complement,
// because TTI_SFPIADD requires 2's complement format in LREGs
constexpr int sfpload_instr_mod = SIGN_MAGNITUDE_FORMAT ? 12 : 4;

// Operand A is input1 (int32)
// Operand B is input2 (int32)
// Output is int32
#pragma GCC unroll 8
for (int d = 0; d < ITERATIONS; d++) {
// operand A - int32
TTI_SFPLOAD(0, 12, 3, 0);
TTI_SFPLOAD(0, sfpload_instr_mod, 3, 0);
// operand B - int32
TT_SFPLOAD(1, 12, 3, dst_offset * 64);
TT_SFPLOAD(1, sfpload_instr_mod, 3, dst_offset * 64);
TTI_SFPIADD(0, 1, 0, 4);
// MAD has a 2-cycle pipeline latency so we need one cycle latency until next instr can consume the result
TTI_NOP;
// LREG_0 -> dest as int32
TTI_SFPSTORE(0, 12, 3, 0);
TTI_SFPSTORE(0, sfpload_instr_mod, 3, 0);
dst_reg++;
}
}
Expand Down

0 comments on commit 0f57d4e

Please sign in to comment.