Skip to content

Commit

Permalink
#5830: Fix test checks to conform to updated FD2 EWB, fix a trace buf…
Browse files Browse the repository at this point in the history
…fer free and enqueue race
  • Loading branch information
tooniz committed Apr 3, 2024
1 parent a36a2da commit 060bf05
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,26 @@ TEST_F(CommandQueueFixture, InstantiateTraceSanity) {
EXPECT_EQ(data_fd, data_bd);

// Check for content correctness in the trace buffer
CQPrefetchCmd* p_cmd = (CQPrefetchCmd*)(data_fd.data());
// The following commands are expected based on the trace capture
CQPrefetchCmd* p_cmd;
CQDispatchCmd* d_cmd;
size_t p_size = (sizeof(CQPrefetchCmd) / sizeof(uint32_t));
size_t d_size = (sizeof(CQDispatchCmd) / sizeof(uint32_t));
size_t offset = 0;
p_cmd = (CQPrefetchCmd*)(data_fd.data() + offset);
offset += p_size;
EXPECT_EQ(p_cmd->base.cmd_id, CQ_PREFETCH_CMD_RELAY_INLINE);
CQDispatchCmd* d_cmd = (CQDispatchCmd*)(data_fd.data() + (sizeof(CQPrefetchCmd) / sizeof(uint32_t)));

d_cmd = (CQDispatchCmd*)(data_fd.data() + offset);
offset += d_size;
EXPECT_EQ(d_cmd->base.cmd_id, CQ_DISPATCH_CMD_WAIT);

p_cmd = (CQPrefetchCmd*)(data_fd.data() + offset);
offset += p_size;
EXPECT_EQ(p_cmd->base.cmd_id, CQ_PREFETCH_CMD_RELAY_INLINE);

d_cmd = (CQDispatchCmd*)(data_fd.data() + offset);
offset += d_size;
EXPECT_EQ(d_cmd->base.cmd_id, CQ_DISPATCH_CMD_WRITE_PAGED);
EXPECT_EQ(d_cmd->write_paged.is_dram, true);
EXPECT_EQ(d_cmd->write_paged.page_size, 2048);
Expand Down
8 changes: 0 additions & 8 deletions tt_metal/impl/dispatch/command_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -715,13 +715,6 @@ void HWCommandQueue::enqueue_write_buffer(const Buffer& buffer, const void* src,
}
}

log_info(
LogMetalTrace,
"DEBUG completion q blocking={}, expected={}, completed={}",
blocking,
this->num_entries_in_completion_q,
this->num_completed_completion_q_reads);

if (blocking) {
this->finish();
} else {
Expand Down Expand Up @@ -1289,7 +1282,6 @@ void EndTrace(Trace& trace) {

uint32_t InstantiateTrace(Trace& trace, CommandQueue& cq) {
uint32_t trace_id = trace.instantiate(cq);
Finish(cq);
return trace_id;
}

Expand Down
2 changes: 1 addition & 1 deletion tt_metal/impl/dispatch/command_queue_interface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ class SystemMemoryManager {
cq_interface.issue_fifo_limit = (CQ_START + cq_interface.offset + issue_queue_size) >> 4;
}

void set_bypass_mode(const bool enable, const bool clear=true) {
void set_bypass_mode(const bool enable, const bool clear) {
this->bypass_enable = enable;
if (clear) {
this->bypass_buffer.clear();
Expand Down
33 changes: 16 additions & 17 deletions tt_metal/impl/trace/trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,15 @@
#include "tt_metal/host_api.hpp"
#include "tt_metal/impl/dispatch/command_queue.hpp"

namespace tt::tt_metal {
namespace {
// Labels to make the code more readable
static constexpr bool kBlocking = true;
static constexpr bool kNonBlocking = false;
static constexpr bool kEnableCQBypass = true;
static constexpr bool kDisableCQBypass = false;
}

// List of supported commands for tracing
// const unordered_set<EnqueueCommandType> trace_supported_commands = {
// EnqueueCommandType::ENQUEUE_PROGRAM,
// };
namespace tt::tt_metal {

unordered_map<uint32_t, shared_ptr<Buffer>> Trace::buffer_pool;
std::mutex Trace::pool_mutex;
Expand All @@ -41,11 +44,8 @@ void Trace::validate() {
if (cmd.blocking.has_value()) {
// The workload being traced needs to be self-contained and not require any host interaction
// Blocking by definition yields control back to the host, consider breaking it into multiple traces
TT_FATAL(cmd.blocking.value() == false, "Blocking commands are not supported in traces");
TT_FATAL(cmd.blocking.value() == false, "Only non-blocking commands can be captured in Metal Trace!");
}
// if (trace_supported_commands.find(cmd.type) == trace_supported_commands.end()) {
// TT_THROW("Unsupported command type for tracing");
// }
}
}

Expand All @@ -62,37 +62,36 @@ uint32_t Trace::instantiate(CommandQueue& cq) {

// Record the captured Host API as commands via bypass mode
SystemMemoryManager& cq_manager = cq.device()->sysmem_manager();
cq_manager.set_bypass_mode(kEnableCQBypass, kClearBuffer);
cq_manager.set_bypass_mode(kEnableCQBypass, true /*clear buffer*/);
for (auto cmd : this->queue().worker_queue) {
log_debug(LogMetalTrace, "Trace::instantiate found command {}", cmd.type);
// cmd.blocking = kNonBlocking; // skip the blocking check for bypass mode
cq.run_command(cmd);
}
cq.wait_until_empty();
cq_manager.set_bypass_mode(kDisableCQBypass, false);

// Extract the data from the bypass buffer and allocate it into a DRAM buffer
SystemMemoryManager& manager = cq.hw_command_queue().manager;
std::vector<uint32_t>& data = cq_manager.get_bypass_data();
uint64_t data_size = data.size() * sizeof(uint32_t);

// TODO: add CQ_PREFETCH_EXEC_BUF_END command and pad to the next page

size_t numel_page = DeviceCommand::PROGRAM_PAGE_SIZE / sizeof(uint32_t);
size_t numel_padding = numel_page - data.size() % numel_page;
if (numel_padding > 0) {
data.resize(data.size() + numel_padding, 0);
data.resize(data.size() + numel_padding, 0/*padding value*/);
}
log_debug(LogMetalTrace, "Trace buffer size = {}, padded size = {}, data = {}", data_size, data.size() * sizeof(uint32_t), data);
log_trace(LogMetalTrace, "Trace buffer size = {}, padded size = {}, data = {}", data_size, data.size() * sizeof(uint32_t), data);

// Commit the trace buffer to device DRAM in a blocking fashion before clearing the bypass mode and data
// Commit the trace buffer to device DRAM
auto buffer = std::make_shared<Buffer>(
cq.device(),
data.size() * sizeof(uint32_t),
DeviceCommand::PROGRAM_PAGE_SIZE,
BufferType::DRAM,
TensorMemoryLayout::INTERLEAVED);
cq_manager.set_bypass_mode(kDisableCQBypass, kClearBuffer);

EnqueueWriteBuffer(cq, buffer, data, kBlocking);
Finish(cq); // clear side effects flag

// Pin the trace buffer in memory until explicitly released by the user
this->add_instance(tid, buffer);
Expand Down
9 changes: 0 additions & 9 deletions tt_metal/impl/trace/trace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,6 @@ class Trace {
friend class EnqueueProgramCommand;
friend void EnqueueTrace(CommandQueue& cq, uint32_t tid, bool blocking);

// Labels to make the code more readable
static constexpr bool kClearBuffer = true;
static constexpr bool kBlocking = true;
static constexpr bool kNonBlocking = false;
static constexpr bool kEnableCQBypass = true;
static constexpr bool kDisableCQBypass = false;

TraceState state;

// Trace queue used to capture commands
Expand Down Expand Up @@ -85,8 +78,6 @@ class Trace {
void begin_capture();
void end_capture();
void validate();
bool captured() { return this->state != TraceState::EMPTY; }
bool instantiating() { return this->state == TraceState::INSTANTIATING; }

// Thread-safe accessors to manage trace instances
static bool has_instance(const uint32_t tid);
Expand Down

0 comments on commit 060bf05

Please sign in to comment.