Skip to content

Commit

Permalink
#5480: Add test dispatch buffer overflow assert, and update test cmd …
Browse files Browse the repository at this point in the history
…line to avoid overflow

 - Generalize (use soc desc) prefetcher buffer overflow assert, and add
   one for dispatch buffer too.  My 8KB page size paged-write unit test
   was unknowingly overflowing L1, this assert catches it.

 - Adjust test cmd line to reduce pages per dispatch buffer block in
   half (48 -> 24) to keep as 768KB dispatch buffer size. This fixes
   newly added assert and hangs, mismatches seen in various versions of
   this test.
  • Loading branch information
kmabeeTT committed Apr 3, 2024
1 parent b0567d7 commit ab2ced7
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 16 deletions.
22 changes: 11 additions & 11 deletions tests/scripts/run_cpp_fd2_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,18 @@ fi
#############################################
echo "Running test_prefetcher tests now...";

TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 0 -i 3 # TrueSmoke Test
TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 1 -i 3 # Smoke Test
TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 2 -i 3 # Random Test
TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 3 -i 3 # PCIE Test
TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 4 -i 3 # Paged DRAM Read Test
TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 5 -i 3 # Paged DRAM Write + Read Test
TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 6 -i 3 # Host Test
./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 0 -i 3 # TrueSmoke Test
./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 1 -i 3 # Smoke Test
./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 2 -i 3 # Random Test
./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 3 -i 3 # PCIE Test
./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 4 -i 3 # Paged DRAM Read Test
./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 5 -i 3 # Paged DRAM Write + Read Test
./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 6 -i 3 # Host Test

# Testcase: Paged Write Cmd to DRAM. 256 pages, 224b size.
TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 4 -i 1 -dpgs 224 -dpgr 256
./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 4 -i 1 -dpgs 224 -dpgr 256
# Testcase: Paged Write Cmd to DRAM. 120 pages, 64b size.
TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 4 -i 1 -dpgs 64 -dpgr 120
./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 4 -i 1 -dpgs 64 -dpgr 120

#############################################
# TEST_DISPATCHER TESTS #
Expand All @@ -56,8 +56,8 @@ echo "Running test_dispatcher tests now...";
# Testcase: Arbitrary non-even numbers. This caught some test issues with overflowing start_page one test implementation.
./build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -i 1 -w 0 -t 2 -min 16 -max 16 -lps 5 -pbs 275 -np 13
./build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -i 1 -w 0 -t 3 -min 16 -max 16 -lps 5 -pbs 275 -np 13
# 11.7 GB/s whb0 - DRAM. Have to reduce number of pages to not exceed 1MB L1 for GS.
./build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -w 1000 -t 2 -min 8192 -max 8192 -lps 13 -pbs 2 -np 100 -i 1000
# 11.885 GB/s whb0 - DRAM. Have to reduce number of pages to not exceed 1MB L1 for GS. Also, number of pages per block.
./build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -w 10 -t 2 -min 8192 -max 8192 -lps 13 -pbs 2 -np 100 -i 1 -pi 5000 -bs 24

# Packed Write
./build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -i 3 -w 5 -t 4 -min 256 -max 256
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,5 @@ TT_METAL_SLOW_DISPATCH_MODE=1 ${TT_METAL_HOME}/build/test/tt_metal/perf_microben
# 9.600 GB/s whb0
TT_METAL_SLOW_DISPATCH_MODE=1 ${TT_METAL_HOME}/build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -w 10 -t 2 -wx 0 -wy 1 -min 4096 -max 4096 -lps 12 -pbs 2 -np 128 -c -i 1 -pi 10000 |& tee ${DIR}/perf_write_128_page_4096b_size_dispatch_buffer_4096b_pages_10000_iter_dram_pbs2.log

# 11.7 GB/s whb0
TT_METAL_SLOW_DISPATCH_MODE=1 ${TT_METAL_HOME}/build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -w 1000 -t 2 -wx 0 -wy 1 -min 8192 -max 8192 -lps 13 -pbs 2 -np 128 -c -i 1000 |& tee ${DIR}/perf_write_128_page_8192b_size_dispatch_buffer_8192b_pages_1000_iter_dram_pbs2.log
# FIXME Hangs TT_METAL_SLOW_DISPATCH_MODE=1 ${TT_METAL_HOME}/build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -w 10 -t 2 -wx 0 -wy 1 -min 8192 -max 8192 -lps 13 -pbs 2 -np 128 -c -i 1 -pi 10000 |& tee ${DIR}/perf_write_128_page_8192b_size_dispatch_buffer_8192b_pages_10000_iter_dram_pbs2.log
# 11.872 GB/s whb0 - reduced number of pages per block in half otherwise uses 1536 KB L1 (exceeds for GS, WH)
TT_METAL_SLOW_DISPATCH_MODE=1 ${TT_METAL_HOME}/build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -w 10 -t 2 -wx 0 -wy 1 -min 8192 -max 8192 -lps 13 -pbs 2 -np 128 -c -i 1 -pi 5000 -bs 24 |& tee ${DIR}/perf_write_128_page_8192b_size_dispatch_buffer_8192b_pages_10000_iter_dram_pbs2.log
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ void init(int argc, char **argv) {
dispatch_buffer_block_size_pages_g = test_args::get_command_option_uint32(input_args, "-bs", DEFAULT_DISPATCH_BUFFER_BLOCK_SIZE_PAGES);
dispatch_buffer_size_blocks_g = test_args::get_command_option_uint32(input_args, "-b", DEFAULT_DISPATCH_BUFFER_SIZE_BLOCKS);
dispatch_buffer_size_g = dispatch_buffer_page_size_g * dispatch_buffer_block_size_pages_g * dispatch_buffer_size_blocks_g;
log_info(tt::LogTest, "Computed dispatch_buffer_size_g: {} from page_size: {} block_size_pages: {} blocks: {}",
dispatch_buffer_size_g, dispatch_buffer_page_size_g, dispatch_buffer_block_size_pages_g, dispatch_buffer_size_blocks_g);

prefetcher_page_batch_size_g = test_args::get_command_option_uint32(input_args, "-ppbs", DEFAULT_PREFETCHER_PAGE_BATCH_SIZE);

Expand All @@ -119,6 +121,8 @@ void init(int argc, char **argv) {
// divide the batch size evenlly, one page for terminate
pbs_pages = pbs_pages / prefetcher_page_batch_size_g * prefetcher_page_batch_size_g + terminate_cmd_pages;
prefetcher_buffer_size_g = pbs_pages * dispatch_buffer_page_size_g;
log_info(tt::LogTest, "Computed prefetcher_buffer_size_g: {} from page_size: {} prefetch_buffer_pages: {}",
prefetcher_buffer_size_g, dispatch_buffer_page_size_g, pbs_pages);

max_xfer_size_bytes_g = test_args::get_command_option_uint32(input_args, "-max", max_xfer_size_bytes_g);
min_xfer_size_bytes_g = test_args::get_command_option_uint32(input_args, "-min", min_xfer_size_bytes_g);
Expand Down Expand Up @@ -355,8 +359,15 @@ int main(int argc, char **argv) {
// Want different buffers on each core, instead use big buffer and self-manage it
uint32_t l1_buf_base = align(DISPATCH_L1_UNRESERVED_BASE, dispatch_buffer_page_size_g);
TT_ASSERT((l1_buf_base & (dispatch_buffer_page_size_g - 1)) == 0);
if (prefetcher_buffer_size_g + l1_buf_base > 1024 * 1024) {
log_fatal(LogTest, "Error, prefetcher buffer size too large\n");

// Make sure user doesn't exceed available L1 space with cmd line arguments.
auto &soc_desc = tt::Cluster::instance().get_soc_desc(device->id());
if (prefetcher_buffer_size_g + l1_buf_base > soc_desc.worker_l1_size) {
log_fatal(LogTest, "Prefetcher buffer size too large. {} exceeds l1_worker_size: {}", dispatch_buffer_size_g, soc_desc.worker_l1_size);
exit(-1);
}
if (dispatch_buffer_size_g + l1_buf_base > soc_desc.worker_l1_size) {
log_fatal(LogTest, "Dispatcher buffer size too large. {} exceeds l1_worker_size: {}", dispatch_buffer_size_g, soc_desc.worker_l1_size);
exit(-1);
}

Expand Down

0 comments on commit ab2ced7

Please sign in to comment.