Skip to content

Commit

Permalink
Revert "Revert "#5424: Delegated sfpu reciprocal calls to wh_b0 submo…
Browse files Browse the repository at this point in the history
…dule functions"" (#10171)

Revert "Revert "#5424: Delegated sfpu reciprocal calls to wh_b0 submodule fun…"

This reverts commit 1e8efd3.
  • Loading branch information
TT-billteng authored Jul 11, 2024
1 parent 95d4dd2 commit 99b2214
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,80 +9,30 @@
#include "noc_nonblocking_api.h"

#include "sfpi.h"
#include "sfpu/ckernel_sfpu_recip.h"
using namespace sfpi;

namespace ckernel
{
namespace sfpu
{

template <int max_iter = 3,bool save_reg=true>
template <int max_iter = 3,bool save_reg=true /* Unused. Enough registers available. */>
sfpi_inline vFloat sfpu_reciprocal(const vFloat in)
{
// Force sign to 1 (make number negative)
vFloat val = setsgn(in, 1);

val = setexp(val, 126); // Set exponent to 126 to make the number in 0.5-1
// Use 1.44 as first guess at x, ideal value would be 1.33, but we happen to have 1.44 available, so use that to avoid a load
vFloat vConstLn2Recip = vConstFloatPrgm0;

vFloat two;
if constexpr (save_reg) {
two = vConstFloatPrgm1;
}

vFloat result = vConstLn2Recip * (val * vConstLn2Recip + (save_reg ? 2.0 : two));

for (int s_iter = 0; s_iter < (max_iter-1); s_iter++) {
result = result * (val * result + (save_reg ? 2.0 : two));
}

vInt orig_exp = exexp(in);
vInt new_exp = exexp(result);

// "Subtract" exponents, and re-bias.
// Execute: -1 - exp, then exp += 127
new_exp -= orig_exp;
new_exp += 126;

v_if (new_exp < 0) {
// If rebiased exponent is negative, we need to saturate at 0.
// This means the initial number was too big so reciprocal result should be 0
result = 0.0F;
new_exp = 0;
}
v_endif;

// Set newly denormalized exponent to result exponent field
return setexp(result, new_exp);
return _sfpu_reciprocal_<max_iter>(in);
}


template <bool APPROXIMATION_MODE, int ITERATIONS=8>
inline void calculate_reciprocal()
{
#pragma GCC unroll 8
for (int d = 0; d < ITERATIONS; d++)
{
vFloat in = dst_reg[0];
vFloat out = sfpu_reciprocal<APPROXIMATION_MODE ? 2 : 3,true>(in);

v_if (in < 0.0F) {
// Invert sign on calculated value if CC=1 (number is negative)
out = -out;
}
v_endif;

dst_reg[0] = out;

dst_reg++;
}
_calculate_reciprocal_<APPROXIMATION_MODE, ITERATIONS>(ITERATIONS);
}

template <bool APPROXIMATION_MODE>
void recip_init() {
vConstFloatPrgm0 = 1.442695f; // ln2_recip
vConstFloatPrgm1 = 2.0f;
_init_reciprocal_<APPROXIMATION_MODE>();
}

} // namespace sfpu
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/third_party/tt_llk_wormhole_b0

0 comments on commit 99b2214

Please sign in to comment.