-
Notifications
You must be signed in to change notification settings - Fork 87
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#11669: Profiler slow dispatch unit test
- Loading branch information
1 parent
42cca16
commit a402d6d
Showing
6 changed files
with
215 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
35 changes: 35 additions & 0 deletions
35
...est_custom_cycle_count_slow_dispatch/kernels/custom_cycle_count_compute_slow_dispatch.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#include <cstdint> | ||
#include "compute_kernel_api.h" | ||
|
||
/** | ||
* LOOP_COUNT and LOOP_SIZE provide the ability to decide how many cycles this kernel takes. | ||
* With a large enough LOOP_COUNT and a LOOP_SIZEs within icache size, cycle count will be | ||
* very close to LOOP_COUNT x (LOOP_SIZE + loop_overhead). loop_overhead is 2 cycle 1 for | ||
* addi and 1 for branch if not zero. | ||
* | ||
* Keeping LOOP_SIZE constant and suitable for all 5 risc ichahes, The diff between to runs | ||
* with LOOP_COUNT and LOOP_COUNT + 1 should be the same across all riscs and it should be | ||
* LOOP_COUNT + 2 cycles | ||
* | ||
* More info on tt-metal issue #515 | ||
* | ||
* https://github.com/tenstorrent/tt-metal/issues/515#issuecomment-1548434301 | ||
*/ | ||
|
||
namespace NAMESPACE { | ||
void MAIN { | ||
for (int i = 0; i < LOOP_COUNT; i ++) | ||
{ | ||
//Max unroll size | ||
#pragma GCC unroll 65534 | ||
for (int j = 0 ; j < LOOP_SIZE; j++) | ||
{ | ||
asm("nop"); | ||
} | ||
} | ||
} | ||
} // NAMESPACE |
32 changes: 32 additions & 0 deletions
32
...ofiler/test_custom_cycle_count_slow_dispatch/kernels/custom_cycle_count_slow_dispatch.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#include <cstdint> | ||
|
||
/** | ||
* LOOP_COUNT and LOOP_SIZE provide the ability to decide how many cycles this kernel takes. | ||
* With a large enough LOOP_COUNT and a LOOP_SIZEs within icache size, cycle count will be | ||
* very close to LOOP_COUNT x (LOOP_SIZE + loop_overhead). loop_overhead is 2 cycle 1 for | ||
* addi and 1 for branch if not zero. | ||
* | ||
* Keeping LOOP_SIZE constant and suitable for all 5 risc ichahes, The diff between to runs | ||
* with LOOP_COUNT and LOOP_COUNT + 1 should be the same across all riscs and it should be | ||
* LOOP_COUNT + 2 cycles | ||
* | ||
* More info on tt-metal issue #515 | ||
* | ||
* https://github.com/tenstorrent/tt-metal/issues/515#issuecomment-1548434301 | ||
*/ | ||
|
||
void kernel_main() { | ||
for (int i = 0; i < LOOP_COUNT; i ++) | ||
{ | ||
//Max unroll size | ||
#pragma GCC unroll 65534 | ||
for (int j = 0 ; j < LOOP_SIZE; j++) | ||
{ | ||
asm("nop"); | ||
} | ||
} | ||
} |
84 changes: 84 additions & 0 deletions
84
.../profiler/test_custom_cycle_count_slow_dispatch/test_custom_cycle_count_slow_dispatch.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#include "tt_metal/host_api.hpp" | ||
#include "tt_metal/detail/tt_metal.hpp" | ||
#include "tt_metal/impl/device/device.hpp" | ||
|
||
using namespace tt; | ||
|
||
bool RunCustomCycle(tt_metal::Device *device, int loop_count) | ||
{ | ||
bool pass = true; | ||
|
||
CoreCoord compute_with_storage_size = device->compute_with_storage_grid_size(); | ||
CoreCoord start_core = {0, 0}; | ||
CoreCoord end_core = {compute_with_storage_size.x - 1, compute_with_storage_size.y - 1}; | ||
CoreRange all_cores(start_core, end_core); | ||
|
||
tt_metal::Program program = tt_metal::CreateProgram(); | ||
|
||
constexpr int loop_size = 50; | ||
constexpr bool profile_device = true; | ||
std::map<string, string> kernel_defines = { | ||
{"LOOP_COUNT", std::to_string(loop_count)}, | ||
{"LOOP_SIZE", std::to_string(loop_size)} | ||
}; | ||
|
||
tt_metal::KernelHandle brisc_kernel = tt_metal::CreateKernel( | ||
program, "tt_metal/programming_examples/profiler/test_custom_cycle_count_slow_dispatch/kernels/custom_cycle_count_slow_dispatch.cpp", | ||
all_cores, | ||
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .defines = kernel_defines}); | ||
|
||
tt_metal::KernelHandle ncrisc_kernel = tt_metal::CreateKernel( | ||
program, "tt_metal/programming_examples/profiler/test_custom_cycle_count_slow_dispatch/kernels/custom_cycle_count_slow_dispatch.cpp", | ||
all_cores, | ||
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default, .defines = kernel_defines}); | ||
|
||
vector<uint32_t> trisc_kernel_args = {}; | ||
tt_metal::KernelHandle trisc_kernel = tt_metal::CreateKernel( | ||
program, "tt_metal/programming_examples/profiler/test_custom_cycle_count_slow_dispatch/kernels/custom_cycle_count_compute_slow_dispatch.cpp", | ||
all_cores, | ||
tt_metal::ComputeConfig{.compile_args = trisc_kernel_args, .defines = kernel_defines} | ||
); | ||
|
||
tt_metal::detail::LaunchProgram(device, program); | ||
|
||
return pass; | ||
} | ||
|
||
int main(int argc, char **argv) { | ||
bool pass = true; | ||
|
||
try { | ||
//////////////////////////////////////////////////////////////////////////// | ||
// Device Setup | ||
//////////////////////////////////////////////////////////////////////////// | ||
int device_id = 0; | ||
tt_metal::Device *device = | ||
tt_metal::CreateDevice(device_id); | ||
|
||
int loop_count = 2000; | ||
pass &= RunCustomCycle(device, loop_count); | ||
|
||
pass &= tt_metal::CloseDevice(device); | ||
|
||
} catch (const std::exception &e) { | ||
pass = false; | ||
// Capture the exception error message | ||
log_error(LogTest, "{}", e.what()); | ||
// Capture system call errors that may have returned from driver/kernel | ||
log_error(LogTest, "System error message: {}", std::strerror(errno)); | ||
} | ||
|
||
if (pass) { | ||
log_info(LogTest, "Test Passed"); | ||
} else { | ||
TT_THROW("Test Failed"); | ||
} | ||
|
||
TT_FATAL(pass); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters