Skip to content

Commit

Permalink
updates/fixes after rebase - also apply PR feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
SeanNijjar committed Dec 21, 2024
1 parent 96ba16e commit 20416bd
Show file tree
Hide file tree
Showing 20 changed files with 130 additions and 229 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ void kernel_main() {
auto worker_buffer_index_semaphore_addr = get_semaphore(get_arg_val<uint32_t>(arg_idx++));
bool connected_to_persistent_fabric = get_arg_val<uint32_t>(arg_idx++) != 0;

DPRINT << "worker_buffer_index_semaphore_addr: " << (uint32_t)worker_buffer_index_semaphore_addr << "\n";
DPRINT << "connected_to_persistent_fabric: " << (uint32_t)connected_to_persistent_fabric << "\n";
// TODO: move to semaphore
auto edm_buffer_index_sem_id = get_arg_val<uint32_t>(arg_idx++);
ASSERT(edm_buffer_index_sem_id < 8);
Expand All @@ -80,7 +78,6 @@ void kernel_main() {
} else {
config.unicast.distance = static_cast<uint8_t>(get_arg_val<uint32_t>(arg_idx++));
}
DPRINT << "config.unicast.distance: " << (uint32_t)config.unicast.distance << "\n";

const InterleavedAddrGen<dest_is_dram> dest_addr_gen = {
.bank_base_address = dest_addr, .page_size = page_size};
Expand All @@ -101,9 +98,7 @@ void kernel_main() {
writer_send_sem_addr,
worker_buffer_index_semaphore_addr);

DPRINT << "sender open\n";
sender.open();
DPRINT << "opened\n";

constexpr uint32_t cb_id_in0 = tt::CBIndex::c_0;

Expand All @@ -116,10 +111,8 @@ void kernel_main() {
uint32_t buffer_index = 0;
cb_wait_front(cb_id_in0, 1);
auto a_packet_header_addr = get_read_ptr(cb_id_in0);
DPRINT << "total_pages_to_send: " << (uint32_t)total_pages_to_send << "\n";
for (uint32_t p = 0; p < total_pages_to_send; p += num_pages_per_send) {
uint32_t pages_to_send = std::min<uint32_t>(num_pages_per_send, total_pages_to_send - p);
DPRINT << "wait empty write slot\n";
sender.wait_for_empty_write_slot();
cb_wait_front(cb_id_in0, pages_to_send);

Expand Down Expand Up @@ -156,22 +149,15 @@ void kernel_main() {
uint64_t buffer_address = sender.edm_buffer_addr +
(*sender.buffer_index_ptr * (sender.buffer_size_bytes + sizeof(eth_channel_sync_t)));
sender.send_payload_blocking_from_address(packet_addr, packet_size);
DPRINT << "noc write barrier\n";
noc_async_writes_flushed();
DPRINT << "cb pop front\n";
cb_pop_front(cb_id_in0, pages_to_send);
DPRINT << "cb pop front done\n";
}

DPRINT << "DONE MAIN LOOP\n";
if constexpr (!mcast_mode) {
DPRINT << "TEARDOWN\n";
sender.wait_for_empty_write_slot();

auto& packet_header = *reinterpret_cast<tt::fabric::PacketHeader*>(a_packet_header_addr);
ASSERT(*last_message_semaphore_address == 0);
packet_header.reserved = 0xE;
packet_header.reserved2 = 0xFFFF;
packet_header.to_atomic_inc();
packet_header.to_chip_unicast(tt::fabric::UnicastRoutingCommandHeader{2});
packet_header.to_noc_unicast_atomic_inc(tt::fabric::NocUnicastAtomicIncCommandHeader(
Expand All @@ -180,16 +166,12 @@ void kernel_main() {
sender.send_payload_blocking_from_address(
a_packet_header_addr, packet_header.get_payload_size_including_header());

DPRINT << "WAITING FOR COMPLETION LOOPBACK @: " << (uint32_t)last_message_semaphore_address << "\n";
noc_semaphore_wait(last_message_semaphore_address, 1);
DPRINT << "\t got it!\n";
}

bool closed_fabric_connection = terminate_fabric_endpoints_farthest_to_nearest(sender, a_packet_header_addr, arg_idx);

if (!closed_fabric_connection) {
DPRINT << "CLOSE\n";
sender.close();
}
DPRINT << "DONE\n";
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/fabric_edm_packet_header.hpp"
#include "ttnn/cpp/ttnn/operations/ccl/kernels/edm_fabric/edm_fabric_worker_adapters.hpp"
#include "tests/ttnn/unit_tests/gtests/ccl/kernels/test_kernels.common.hpp"
#include "ttnn/cpp/ttnn/operations/ccl/common/kernels/ccl_send_utils.hpp"

struct unicast_mode {
uint8_t distance;
Expand Down Expand Up @@ -51,9 +52,7 @@ auto forward_to_fabric_from_cb(

// bit of a hack to extract X/Y
const auto dest_noc_address = get_noc_addr(current_page, dest_addr_gen, 0, NORMALIZED_NOC_INDEX);
const size_t dest_addr = dest_noc_address & 0xFFFFFFFF;
const size_t dest_noc_x = (dest_noc_address >> NOC_ADDR_LOCAL_BITS) & ((1 << NOC_ADDR_NODE_ID_BITS) - 1);
const size_t dest_noc_y = (dest_noc_address >> (NOC_ADDR_LOCAL_BITS + NOC_ADDR_NODE_ID_BITS)) & ((1 << NOC_ADDR_NODE_ID_BITS) - 1);
const auto [dest_worker_noc, dest_addr] = get_noc_address_components(dest_noc_address);
const size_t packet_size = page_size + sizeof(tt::fabric::PacketHeader);

auto packet_addr = get_read_ptr(cb_id);
Expand All @@ -64,20 +63,16 @@ auto forward_to_fabric_from_cb(
.to_noc_unicast(tt::fabric::NocUnicastCommandHeader{
dest_addr,
(pages_to_send * page_size) + sizeof(tt::fabric::PacketHeader),
static_cast<uint8_t>(dest_noc_x),
static_cast<uint8_t>(dest_noc_y)
});
packet_header.reserved2 = 0x1111; // debug only
static_cast<uint8_t>(dest_worker_noc.x),
static_cast<uint8_t>(dest_worker_noc.y)});
} else {
packet_header.to_write()
.to_chip_unicast(tt::fabric::UnicastRoutingCommandHeader{config.unicast.distance})
.to_noc_unicast(tt::fabric::NocUnicastCommandHeader{
dest_addr,
(pages_to_send * page_size) + sizeof(tt::fabric::PacketHeader),
static_cast<uint8_t>(dest_noc_x),
static_cast<uint8_t>(dest_noc_y)
});
packet_header.reserved2 = 0x1111; // debug only
static_cast<uint8_t>(dest_worker_noc.x),
static_cast<uint8_t>(dest_worker_noc.y)});
}

uint64_t buffer_address = sender.edm_buffer_addr + (*sender.buffer_index_ptr * (sender.buffer_size_bytes + sizeof(eth_channel_sync_t)));
Expand Down Expand Up @@ -196,12 +191,13 @@ void kernel_main() {

sender.wait_for_empty_write_slot();

constexpr size_t kLoopbackNumHopsToMyChip = 2;
auto &packet_header = *reinterpret_cast<tt::fabric::PacketHeader*>(a_packet_header_addr);
ASSERT(*last_message_semaphore_address == 0);
packet_header.reserved = 0xE;
packet_header.reserved2 = 0xFFFF;
packet_header.to_atomic_inc();
packet_header.to_chip_unicast(tt::fabric::UnicastRoutingCommandHeader{2});
packet_header.to_chip_unicast(tt::fabric::UnicastRoutingCommandHeader{kLoopbackNumHopsToMyChip});
packet_header.to_noc_unicast_atomic_inc(tt::fabric::NocUnicastAtomicIncCommandHeader(
reinterpret_cast<size_t>(last_message_semaphore_address),
1,
Expand Down
Loading

0 comments on commit 20416bd

Please sign in to comment.