-
Notifications
You must be signed in to change notification settings - Fork 87
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
### Ticket Link to Github Issue #13676 ### Problem description - Current implementation of `i0_bw` uses `reciprocal` op which has an ongoing issue #14672 - goal is to reimplement `i0_bw` using `i1` with kernel implementation ### What's changed - implemented `ttnn::i1` using eltwise unary kernel with pcc > 0.9999 <img width="1512" alt="Screenshot 2024-11-23 at 1 21 33 AM" src="https://github.com/user-attachments/assets/8e3aad76-f08c-46fa-bb37-c172c8125040"> - Reimplemented `ttnn.i0_bw` using i1 which gives a pcc ~ 0.9998 <img width="1512" alt="Screenshot 2024-11-23 at 1 22 44 AM" src="https://github.com/user-attachments/assets/05e10f0c-a74f-41b7-a285-acfdf42e9637"> - Updated sweeps of i0_bw <img width="1512" alt="Screenshot 2024-11-23 at 1 33 35 AM" src="https://github.com/user-attachments/assets/0d6be449-fcac-4b6b-8d40-2477eb2fe9b6"> - Profiling : On main vs On branch op,count,python min dispatch time (ms),python mean dispatch time(ms),python mean dispatch + sync time (ms),C++ mean dispatch time (ms) **ttnn.i0_bw,800,0.97,0.997,3.717,0.364** (main ) **ttnn.i0_bw,800,0.06,0.062,0.251,0.021** (branch) ### Checklist - [x] Post commit CI passes https://github.com/tenstorrent/tt-metal/actions/runs/12065876004 - [ ] Blackhole Post commit (if applicable) - [ ] Model regression CI testing passes (if applicable) - [ ] Device performance regression CI testing passes (if applicable) - [x] New/Existing tests provide coverage for changes
- Loading branch information
1 parent
592214d
commit f401c2e
Showing
26 changed files
with
434 additions
and
46 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. | ||
|
||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
import pytest | ||
|
||
import torch | ||
|
||
import ttnn | ||
from models.utility_functions import skip_for_grayskull | ||
|
||
|
||
@skip_for_grayskull("Unsupported dtype for Grayskull") | ||
@pytest.mark.parametrize( | ||
"shapes", | ||
[ | ||
[1, 1, 32, 32], | ||
[4, 2, 96, 192], | ||
[4, 7, 21, 133], | ||
[4, 6, 105, 245], | ||
[64, 64], | ||
[3, 128, 512], | ||
], | ||
) | ||
def test_i1_range(device, shapes): | ||
torch.manual_seed(0) | ||
|
||
high = 10 | ||
low = -10 | ||
torch_input_tensor_a = torch.rand(shapes, dtype=torch.float32) * (high - low) + low | ||
torch_output_tensor = torch.special.i1(torch_input_tensor_a) | ||
|
||
input_tensor_a = ttnn.from_torch( | ||
torch_input_tensor_a, | ||
layout=ttnn.TILE_LAYOUT, | ||
dtype=ttnn.float32, | ||
device=device, | ||
memory_config=ttnn.DRAM_MEMORY_CONFIG, | ||
) | ||
output_tensor = ttnn.i1(input_tensor_a, memory_config=ttnn.DRAM_MEMORY_CONFIG) | ||
output_tensor = ttnn.to_torch(output_tensor) | ||
|
||
pcc = ttnn.pearson_correlation_coefficient(torch_output_tensor, output_tensor) | ||
assert pcc >= 0.9999 | ||
|
||
|
||
@skip_for_grayskull("Unsupported dtype for Grayskull") | ||
@pytest.mark.parametrize( | ||
"shapes", | ||
[ | ||
[4, 2, 96, 192], | ||
[1, 1, 64, 64], | ||
], | ||
) | ||
def test_i1_zero(device, shapes): | ||
torch.manual_seed(0) | ||
|
||
torch_input_tensor_a = torch.zeros(shapes, dtype=torch.float32) | ||
torch_output_tensor = torch.special.i1(torch_input_tensor_a) | ||
|
||
input_tensor_a = ttnn.from_torch( | ||
torch_input_tensor_a, | ||
layout=ttnn.TILE_LAYOUT, | ||
dtype=ttnn.bfloat16, | ||
device=device, | ||
memory_config=ttnn.DRAM_MEMORY_CONFIG, | ||
) | ||
output_tensor = ttnn.i1(input_tensor_a, memory_config=ttnn.DRAM_MEMORY_CONFIG) | ||
output_tensor = ttnn.to_torch(output_tensor) | ||
|
||
assert ttnn.pearson_correlation_coefficient(torch_output_tensor, output_tensor) >= 0.9999 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
57 changes: 57 additions & 0 deletions
57
tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/ckernel_sfpu_i1.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#pragma once | ||
|
||
#include "ckernel.h" | ||
#include "ckernel_defs.h" | ||
#include "noc_nonblocking_api.h" | ||
|
||
using namespace sfpi; | ||
|
||
namespace ckernel { | ||
|
||
namespace sfpu { | ||
|
||
#define POLYVAL10_I1(coef10, coef9, coef8, coef7, coef6, coef5, coef4, coef3, coef2, coef1, coef0, t2) \ | ||
((coef0 + \ | ||
(coef1 + \ | ||
(coef2 + \ | ||
(coef3 + \ | ||
(coef4 + (coef5 + (coef6 + (coef7 + (coef8 + (coef9 + coef10 * t2) * t2) * t2) * t2) * t2) * t2) * t2) * \ | ||
t2) * \ | ||
t2) * \ | ||
t2) * \ | ||
t2) | ||
|
||
template <bool APPROXIMATION_MODE, int ITERATIONS = 8> | ||
inline void calculate_i1() { | ||
#pragma GCC unroll 0 | ||
|
||
for (int d = 0; d < ITERATIONS; d++) { | ||
vFloat result = 0.0f; | ||
vFloat input = dst_reg[0]; | ||
vFloat x = input * input; | ||
|
||
vFloat derivative = input * POLYVAL10_I1( | ||
1.24695e-23f, | ||
6.58387e-21f, | ||
2.8969e-18f, | ||
1.04289e-15f, | ||
3.00351e-13f, | ||
6.72786e-11f, | ||
1.13028e-08f, | ||
1.35634e-06f, | ||
0.000108507f, | ||
0.00520833f, | ||
0.125f, | ||
x); | ||
result = input * 0.5f + derivative; | ||
dst_reg[0] = result; | ||
dst_reg++; | ||
} | ||
} | ||
|
||
} // namespace sfpu | ||
} // namespace ckernel |
26 changes: 26 additions & 0 deletions
26
tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_i1.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#pragma once | ||
|
||
#include "llk_math_eltwise_unary_sfpu_init.h" | ||
#include "llk_math_eltwise_unary_sfpu_params.h" | ||
#include "ckernel_sfpu_i1.h" | ||
|
||
namespace ckernel { | ||
|
||
// New LLK SFPU APIs | ||
|
||
template <bool APPROXIMATE> | ||
inline void llk_math_eltwise_unary_sfpu_i1_init() { | ||
llk_math_eltwise_unary_sfpu_init<SfpuType::i1, APPROXIMATE>(); | ||
} | ||
|
||
template <bool APPROXIMATE> | ||
inline void llk_math_eltwise_unary_sfpu_i1_op(uint dst_index) { | ||
llk_math_eltwise_unary_sfpu_params<APPROXIMATE>( | ||
ckernel::sfpu::calculate_i1<APPROXIMATE>, dst_index, (int)VectorMode::RC); | ||
} | ||
|
||
} // namespace ckernel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -59,6 +59,7 @@ enum SfpuType { | |
logical_not_unary, | ||
erfinv, | ||
i0, | ||
i1, | ||
silu, | ||
mask, | ||
negative, | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
58 changes: 58 additions & 0 deletions
58
tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/ckernel_sfpu_i1.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#pragma once | ||
|
||
#include "ckernel.h" | ||
#include "ckernel_defs.h" | ||
#include "noc_nonblocking_api.h" | ||
|
||
#include "sfpi.h" | ||
|
||
using namespace sfpi; | ||
|
||
namespace ckernel { | ||
namespace sfpu { | ||
|
||
#define POLYVAL10_I1(coef10, coef9, coef8, coef7, coef6, coef5, coef4, coef3, coef2, coef1, coef0, t2) \ | ||
((coef0 + \ | ||
(coef1 + \ | ||
(coef2 + \ | ||
(coef3 + \ | ||
(coef4 + (coef5 + (coef6 + (coef7 + (coef8 + (coef9 + coef10 * t2) * t2) * t2) * t2) * t2) * t2) * t2) * \ | ||
t2) * \ | ||
t2) * \ | ||
t2) * \ | ||
t2) | ||
|
||
template <bool APPROXIMATION_MODE, int ITERATIONS> | ||
inline void calculate_i1() { | ||
#pragma GCC unroll 0 | ||
|
||
for (int d = 0; d < ITERATIONS; d++) { | ||
vFloat result = 0.0f; | ||
vFloat input = dst_reg[0]; | ||
vFloat x = input * input; | ||
|
||
vFloat derivative = input * POLYVAL10_I1( | ||
1.24695e-23f, | ||
6.58387e-21f, | ||
2.8969e-18f, | ||
1.04289e-15f, | ||
3.00351e-13f, | ||
6.72786e-11f, | ||
1.13028e-08f, | ||
1.35634e-06f, | ||
0.000108507f, | ||
0.00520833f, | ||
0.125f, | ||
x); | ||
result = input * 0.5f + derivative; | ||
dst_reg[0] = result; | ||
dst_reg++; | ||
} | ||
} | ||
|
||
} // namespace sfpu | ||
} // namespace ckernel |
26 changes: 26 additions & 0 deletions
26
tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_i1.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#pragma once | ||
|
||
#include "llk_math_eltwise_unary_sfpu_init.h" | ||
#include "llk_math_eltwise_unary_sfpu_params.h" | ||
#include "ckernel_sfpu_i1.h" | ||
|
||
namespace ckernel { | ||
|
||
// New LLK SFPU APIs | ||
|
||
template <bool APPROXIMATE> | ||
inline void llk_math_eltwise_unary_sfpu_i1_init() { | ||
llk_math_eltwise_unary_sfpu_init<SfpuType::i1, APPROXIMATE>(); | ||
} | ||
|
||
template <bool APPROXIMATE> | ||
inline void llk_math_eltwise_unary_sfpu_i1_op(uint dst_index) { | ||
llk_math_eltwise_unary_sfpu_params<APPROXIMATE>( | ||
ckernel::sfpu::calculate_i1<APPROXIMATE, 4>, dst_index, (int)VectorMode::RC); | ||
} | ||
|
||
} // namespace ckernel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,6 +57,7 @@ enum SfpuType { | |
logical_not_unary, | ||
erfinv, | ||
i0, | ||
i1, | ||
silu, | ||
mask, | ||
negative, | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.