From 06713f06f9907d8aa56b1c6e814a0f1a277c84f9 Mon Sep 17 00:00:00 2001 From: Ilkoo Lee Date: Wed, 18 Dec 2024 14:25:59 +0000 Subject: [PATCH] #16134: add test case for pre-allocated CreateBuffer --- tests/tt_metal/tt_metal/api/test_dram.cpp | 66 +++++++++++++++++++ .../unit_tests/gtests/test_async_runtime.cpp | 2 + tt_metal/third_party/pybind11 | 1 + 3 files changed, 69 insertions(+) create mode 160000 tt_metal/third_party/pybind11 diff --git a/tests/tt_metal/tt_metal/api/test_dram.cpp b/tests/tt_metal/tt_metal/api/test_dram.cpp index 293a10a5cafd..bd0e6e475bb9 100644 --- a/tests/tt_metal/tt_metal/api/test_dram.cpp +++ b/tests/tt_metal/tt_metal/api/test_dram.cpp @@ -122,6 +122,54 @@ bool dram_single_core( fixture->ReadBuffer(device, output_dram_buffer, result_vec); return result_vec == src_vec; } + +bool dram_single_core_pre_allocated( + DispatchFixture* fixture, tt_metal::Device* device, const DRAMConfig& cfg, std::vector src_vec) { + // Create a program + tt_metal::Program program = CreateProgram(); + + tt_metal::InterleavedBufferConfig dram_config{ + .device = device, + .size = cfg.dram_buffer_size, + .page_size = cfg.dram_buffer_size, + .buffer_type = tt_metal::BufferType::DRAM}; + + auto input_dram_buffer = tt_metal::CreateBuffer(dram_config); + uint32_t input_dram_buffer_addr = input_dram_buffer->address(); + auto input_dram_pre_allocated_buffer = tt_metal::CreateBuffer(dram_config, input_dram_buffer_addr); + uint32_t input_dram_pre_allocated_buffer_addr = input_dram_pre_allocated_buffer->address(); + + TT_FATAL(input_dram_buffer_addr == input_dram_pre_allocated_buffer_addr, "Error"); + + auto output_dram_buffer = tt_metal::CreateBuffer(dram_config); + uint32_t output_dram_buffer_addr = output_dram_buffer->address(); + auto output_dram_pre_allocated_buffer = tt_metal::CreateBuffer(dram_config, output_dram_buffer_addr); + uint32_t output_dram_pre_allocated_buffer_addr = output_dram_pre_allocated_buffer->address(); + + TT_FATAL(output_dram_buffer_addr == output_dram_pre_allocated_buffer_addr, "Error"); + + // Create the kernel + auto dram_kernel = tt_metal::CreateKernel(program, cfg.kernel_file, cfg.core_range, cfg.data_movement_cfg); + fixture->WriteBuffer(device, input_dram_pre_allocated_buffer, src_vec); + + tt_metal::SetRuntimeArgs( + program, + dram_kernel, + cfg.core_range, + {cfg.l1_buffer_addr, + input_dram_pre_allocated_buffer_addr, + (std::uint32_t)0, + output_dram_pre_allocated_buffer_addr, + (std::uint32_t)0, + cfg.dram_buffer_size}); + + fixture->RunProgram(device, program); + + std::vector result_vec; + fixture->ReadBuffer(device, output_dram_pre_allocated_buffer, result_vec); + + return result_vec == src_vec; +} } // namespace unit_tests_common::dram::test_dram TEST_F(DispatchFixture, TensixDRAMLoopbackSingleCore) { @@ -142,6 +190,24 @@ TEST_F(DispatchFixture, TensixDRAMLoopbackSingleCore) { } } +TEST_F(DispatchFixture, TensixDRAMLoopbackSingleCorePreAllocated) { + uint32_t buffer_size = 2 * 1024 * 25; + std::vector src_vec = + create_random_vector_of_bfloat16(buffer_size, 100, std::chrono::system_clock::now().time_since_epoch().count()); + unit_tests_common::dram::test_dram::DRAMConfig dram_test_config = { + .core_range = {{0, 0}, {0, 0}}, + .kernel_file = "tests/tt_metal/tt_metal/test_kernels/dataflow/dram_copy.cpp", + .dram_buffer_size = buffer_size, + .l1_buffer_addr = 400 * 1024, + .data_movement_cfg = + {.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default}, + }; + for (unsigned int id = 0; id < devices_.size(); id++) { + ASSERT_TRUE(unit_tests_common::dram::test_dram::dram_single_core_pre_allocated( + this, devices_.at(id), dram_test_config, src_vec)); + } +} + TEST_F(DispatchFixture, TensixDRAMLoopbackSingleCoreDB) { if (!this->IsSlowDispatch()) { tt::log_info(tt::LogTest, "This test is only supported in slow dispatch mode"); diff --git a/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp b/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp index b5495a324dbb..7474f324ccab 100644 --- a/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp +++ b/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp @@ -74,11 +74,13 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncPreallocatedOutputs) { ttnn::record_event(device_->command_queue(io_cq), write_event); // Host stalls until write is completed, before sending workload ttnn::event_synchronize(write_event); + EXPECT_EQ(ttnn::event_query(write_event), true); // Dispatch workload. Preallocated output_tensor is populated by op/ ttnn::moreh_sum(input_tensor, /*dim*/ 3, false, output_tensor, std::nullopt, std::nullopt); // Record completion of workload ttnn::record_event(device_->command_queue(workload_dispatch_cq), workload_event); ttnn::event_synchronize(workload_event); + EXPECT_EQ(ttnn::event_query(workload_event), true); // Read output back, once workload is complete ttnn::read_buffer(io_cq, output_tensor, {readback_data}); // Ensure that reference count book keeping is done correctly diff --git a/tt_metal/third_party/pybind11 b/tt_metal/third_party/pybind11 new file mode 160000 index 000000000000..b8f28551cc3a --- /dev/null +++ b/tt_metal/third_party/pybind11 @@ -0,0 +1 @@ +Subproject commit b8f28551cc3a98ea9fbfc15c05b513c8f2d23e84