Skip to content

Commit

Permalink
#16134: add test case for pre-allocated CreateBuffer
Browse files Browse the repository at this point in the history
  • Loading branch information
ilkoo-lee committed Dec 23, 2024
1 parent 05fee29 commit 06713f0
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 0 deletions.
66 changes: 66 additions & 0 deletions tests/tt_metal/tt_metal/api/test_dram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,54 @@ bool dram_single_core(
fixture->ReadBuffer(device, output_dram_buffer, result_vec);
return result_vec == src_vec;
}

bool dram_single_core_pre_allocated(
DispatchFixture* fixture, tt_metal::Device* device, const DRAMConfig& cfg, std::vector<uint32_t> src_vec) {
// Create a program
tt_metal::Program program = CreateProgram();

tt_metal::InterleavedBufferConfig dram_config{
.device = device,
.size = cfg.dram_buffer_size,
.page_size = cfg.dram_buffer_size,
.buffer_type = tt_metal::BufferType::DRAM};

auto input_dram_buffer = tt_metal::CreateBuffer(dram_config);
uint32_t input_dram_buffer_addr = input_dram_buffer->address();
auto input_dram_pre_allocated_buffer = tt_metal::CreateBuffer(dram_config, input_dram_buffer_addr);
uint32_t input_dram_pre_allocated_buffer_addr = input_dram_pre_allocated_buffer->address();

TT_FATAL(input_dram_buffer_addr == input_dram_pre_allocated_buffer_addr, "Error");

auto output_dram_buffer = tt_metal::CreateBuffer(dram_config);
uint32_t output_dram_buffer_addr = output_dram_buffer->address();
auto output_dram_pre_allocated_buffer = tt_metal::CreateBuffer(dram_config, output_dram_buffer_addr);
uint32_t output_dram_pre_allocated_buffer_addr = output_dram_pre_allocated_buffer->address();

TT_FATAL(output_dram_buffer_addr == output_dram_pre_allocated_buffer_addr, "Error");

// Create the kernel
auto dram_kernel = tt_metal::CreateKernel(program, cfg.kernel_file, cfg.core_range, cfg.data_movement_cfg);
fixture->WriteBuffer(device, input_dram_pre_allocated_buffer, src_vec);

tt_metal::SetRuntimeArgs(
program,
dram_kernel,
cfg.core_range,
{cfg.l1_buffer_addr,
input_dram_pre_allocated_buffer_addr,
(std::uint32_t)0,
output_dram_pre_allocated_buffer_addr,
(std::uint32_t)0,
cfg.dram_buffer_size});

fixture->RunProgram(device, program);

std::vector<uint32_t> result_vec;
fixture->ReadBuffer(device, output_dram_pre_allocated_buffer, result_vec);

return result_vec == src_vec;
}
} // namespace unit_tests_common::dram::test_dram

TEST_F(DispatchFixture, TensixDRAMLoopbackSingleCore) {
Expand All @@ -142,6 +190,24 @@ TEST_F(DispatchFixture, TensixDRAMLoopbackSingleCore) {
}
}

TEST_F(DispatchFixture, TensixDRAMLoopbackSingleCorePreAllocated) {
uint32_t buffer_size = 2 * 1024 * 25;
std::vector<uint32_t> src_vec =
create_random_vector_of_bfloat16(buffer_size, 100, std::chrono::system_clock::now().time_since_epoch().count());
unit_tests_common::dram::test_dram::DRAMConfig dram_test_config = {
.core_range = {{0, 0}, {0, 0}},
.kernel_file = "tests/tt_metal/tt_metal/test_kernels/dataflow/dram_copy.cpp",
.dram_buffer_size = buffer_size,
.l1_buffer_addr = 400 * 1024,
.data_movement_cfg =
{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default},
};
for (unsigned int id = 0; id < devices_.size(); id++) {
ASSERT_TRUE(unit_tests_common::dram::test_dram::dram_single_core_pre_allocated(
this, devices_.at(id), dram_test_config, src_vec));
}
}

TEST_F(DispatchFixture, TensixDRAMLoopbackSingleCoreDB) {
if (!this->IsSlowDispatch()) {
tt::log_info(tt::LogTest, "This test is only supported in slow dispatch mode");
Expand Down
2 changes: 2 additions & 0 deletions tests/ttnn/unit_tests/gtests/test_async_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,13 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncPreallocatedOutputs) {
ttnn::record_event(device_->command_queue(io_cq), write_event);
// Host stalls until write is completed, before sending workload
ttnn::event_synchronize(write_event);
EXPECT_EQ(ttnn::event_query(write_event), true);
// Dispatch workload. Preallocated output_tensor is populated by op/
ttnn::moreh_sum(input_tensor, /*dim*/ 3, false, output_tensor, std::nullopt, std::nullopt);
// Record completion of workload
ttnn::record_event(device_->command_queue(workload_dispatch_cq), workload_event);
ttnn::event_synchronize(workload_event);
EXPECT_EQ(ttnn::event_query(workload_event), true);
// Read output back, once workload is complete
ttnn::read_buffer(io_cq, output_tensor, {readback_data});
// Ensure that reference count book keeping is done correctly
Expand Down
1 change: 1 addition & 0 deletions tt_metal/third_party/pybind11
Submodule pybind11 added at b8f285

0 comments on commit 06713f0

Please sign in to comment.