diff --git a/tests/scripts/run_cpp_fd2_tests.sh b/tests/scripts/run_cpp_fd2_tests.sh index 35dc02fd5904..ca77ab1d7e65 100755 --- a/tests/scripts/run_cpp_fd2_tests.sh +++ b/tests/scripts/run_cpp_fd2_tests.sh @@ -20,18 +20,18 @@ fi ############################################# echo "Running test_prefetcher tests now..."; -TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 0 -i 3 # TrueSmoke Test -TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 1 -i 3 # Smoke Test -TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 2 -i 3 # Random Test -TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 3 -i 3 # PCIE Test -TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 4 -i 3 # Paged DRAM Read Test -TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 5 -i 3 # Paged DRAM Write + Read Test -TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 6 -i 3 # Host Test +./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 0 -i 3 # TrueSmoke Test +./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 1 -i 3 # Smoke Test +./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 2 -i 3 # Random Test +./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 3 -i 3 # PCIE Test +./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 4 -i 3 # Paged DRAM Read Test +./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 5 -i 3 # Paged DRAM Write + Read Test +./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 6 -i 3 # Host Test # Testcase: Paged Write Cmd to DRAM. 256 pages, 224b size. -TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 4 -i 1 -dpgs 224 -dpgr 256 +./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 4 -i 1 -dpgs 224 -dpgr 256 # Testcase: Paged Write Cmd to DRAM. 120 pages, 64b size. -TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 4 -i 1 -dpgs 64 -dpgr 120 +./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 4 -i 1 -dpgs 64 -dpgr 120 ############################################# # TEST_DISPATCHER TESTS # @@ -56,8 +56,8 @@ echo "Running test_dispatcher tests now..."; # Testcase: Arbitrary non-even numbers. This caught some test issues with overflowing start_page one test implementation. ./build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -i 1 -w 0 -t 2 -min 16 -max 16 -lps 5 -pbs 275 -np 13 ./build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -i 1 -w 0 -t 3 -min 16 -max 16 -lps 5 -pbs 275 -np 13 -# 11.7 GB/s whb0 - DRAM. Have to reduce number of pages to not exceed 1MB L1 for GS. -./build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -w 1000 -t 2 -min 8192 -max 8192 -lps 13 -pbs 2 -np 100 -i 1000 +# 11.885 GB/s whb0 - DRAM. Have to reduce number of pages to not exceed 1MB L1 for GS. Also, number of pages per block. +./build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -w 10 -t 2 -min 8192 -max 8192 -lps 13 -pbs 2 -np 100 -i 1 -pi 5000 -bs 24 # Packed Write ./build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -i 3 -w 5 -t 4 -min 256 -max 256 diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/run_paged_tests.sh b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/run_paged_tests.sh index 8922dfa30c83..8c2496f476ad 100755 --- a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/run_paged_tests.sh +++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/run_paged_tests.sh @@ -141,6 +141,5 @@ TT_METAL_SLOW_DISPATCH_MODE=1 ${TT_METAL_HOME}/build/test/tt_metal/perf_microben # 9.600 GB/s whb0 TT_METAL_SLOW_DISPATCH_MODE=1 ${TT_METAL_HOME}/build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -w 10 -t 2 -wx 0 -wy 1 -min 4096 -max 4096 -lps 12 -pbs 2 -np 128 -c -i 1 -pi 10000 |& tee ${DIR}/perf_write_128_page_4096b_size_dispatch_buffer_4096b_pages_10000_iter_dram_pbs2.log -# 11.7 GB/s whb0 -TT_METAL_SLOW_DISPATCH_MODE=1 ${TT_METAL_HOME}/build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -w 1000 -t 2 -wx 0 -wy 1 -min 8192 -max 8192 -lps 13 -pbs 2 -np 128 -c -i 1000 |& tee ${DIR}/perf_write_128_page_8192b_size_dispatch_buffer_8192b_pages_1000_iter_dram_pbs2.log -# FIXME Hangs TT_METAL_SLOW_DISPATCH_MODE=1 ${TT_METAL_HOME}/build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -w 10 -t 2 -wx 0 -wy 1 -min 8192 -max 8192 -lps 13 -pbs 2 -np 128 -c -i 1 -pi 10000 |& tee ${DIR}/perf_write_128_page_8192b_size_dispatch_buffer_8192b_pages_10000_iter_dram_pbs2.log +# 11.872 GB/s whb0 - reduced number of pages per block in half otherwise uses 1536 KB L1 (exceeds for GS, WH) +TT_METAL_SLOW_DISPATCH_MODE=1 ${TT_METAL_HOME}/build/test/tt_metal/perf_microbenchmark/dispatch/test_dispatcher -w 10 -t 2 -wx 0 -wy 1 -min 8192 -max 8192 -lps 13 -pbs 2 -np 128 -c -i 1 -pi 5000 -bs 24 |& tee ${DIR}/perf_write_128_page_8192b_size_dispatch_buffer_8192b_pages_10000_iter_dram_pbs2.log diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_dispatcher.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_dispatcher.cpp index d05f406da2c6..5da4ec490c47 100644 --- a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_dispatcher.cpp +++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_dispatcher.cpp @@ -111,6 +111,8 @@ void init(int argc, char **argv) { dispatch_buffer_block_size_pages_g = test_args::get_command_option_uint32(input_args, "-bs", DEFAULT_DISPATCH_BUFFER_BLOCK_SIZE_PAGES); dispatch_buffer_size_blocks_g = test_args::get_command_option_uint32(input_args, "-b", DEFAULT_DISPATCH_BUFFER_SIZE_BLOCKS); dispatch_buffer_size_g = dispatch_buffer_page_size_g * dispatch_buffer_block_size_pages_g * dispatch_buffer_size_blocks_g; + log_info(tt::LogTest, "Computed dispatch_buffer_size_g: {} from page_size: {} block_size_pages: {} blocks: {}", + dispatch_buffer_size_g, dispatch_buffer_page_size_g, dispatch_buffer_block_size_pages_g, dispatch_buffer_size_blocks_g); prefetcher_page_batch_size_g = test_args::get_command_option_uint32(input_args, "-ppbs", DEFAULT_PREFETCHER_PAGE_BATCH_SIZE); @@ -119,6 +121,8 @@ void init(int argc, char **argv) { // divide the batch size evenlly, one page for terminate pbs_pages = pbs_pages / prefetcher_page_batch_size_g * prefetcher_page_batch_size_g + terminate_cmd_pages; prefetcher_buffer_size_g = pbs_pages * dispatch_buffer_page_size_g; + log_info(tt::LogTest, "Computed prefetcher_buffer_size_g: {} from page_size: {} prefetch_buffer_pages: {}", + prefetcher_buffer_size_g, dispatch_buffer_page_size_g, pbs_pages); max_xfer_size_bytes_g = test_args::get_command_option_uint32(input_args, "-max", max_xfer_size_bytes_g); min_xfer_size_bytes_g = test_args::get_command_option_uint32(input_args, "-min", min_xfer_size_bytes_g); @@ -355,8 +359,15 @@ int main(int argc, char **argv) { // Want different buffers on each core, instead use big buffer and self-manage it uint32_t l1_buf_base = align(DISPATCH_L1_UNRESERVED_BASE, dispatch_buffer_page_size_g); TT_ASSERT((l1_buf_base & (dispatch_buffer_page_size_g - 1)) == 0); - if (prefetcher_buffer_size_g + l1_buf_base > 1024 * 1024) { - log_fatal(LogTest, "Error, prefetcher buffer size too large\n"); + + // Make sure user doesn't exceed available L1 space with cmd line arguments. + auto &soc_desc = tt::Cluster::instance().get_soc_desc(device->id()); + if (prefetcher_buffer_size_g + l1_buf_base > soc_desc.worker_l1_size) { + log_fatal(LogTest, "Prefetcher buffer size too large. {} exceeds l1_worker_size: {}", dispatch_buffer_size_g, soc_desc.worker_l1_size); + exit(-1); + } + if (dispatch_buffer_size_g + l1_buf_base > soc_desc.worker_l1_size) { + log_fatal(LogTest, "Dispatcher buffer size too large. {} exceeds l1_worker_size: {}", dispatch_buffer_size_g, soc_desc.worker_l1_size); exit(-1); }