Skip to content

Commit

Permalink
#0: Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
tt-dma committed Dec 23, 2024
1 parent d404720 commit 8149d97
Show file tree
Hide file tree
Showing 6 changed files with 11 additions and 36 deletions.
16 changes: 2 additions & 14 deletions tt_metal/impl/dispatch/kernel_config/demux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,13 @@ void DemuxKernel::GenerateStaticConfigs() {
static_config_.test_results_buf_size_bytes = 0;
static_config_.timeout_cycles = 0;

// TODO: Do we need an upstream sem here?
for (int idx = 0; idx < downstream_kernels_.size(); idx++) {
FDKernel* k = downstream_kernels_[idx];
static_config_.remote_tx_queue_id[idx] = 0;
static_config_.remote_tx_network_type[idx] = (uint32_t)DispatchRemoteNetworkType::NOC0;
static_config_.output_depacketize_cb_log_page_size[idx] = dispatch_constants::DISPATCH_BUFFER_LOG_PAGE_SIZE;
// Only connected dispatchers need a semaphore. TODO: can initialize anyways, but this matches previous
// implementation
if (dynamic_cast<DispatchKernel*>(k)) {
static_config_.output_depacketize_local_sem_id[idx] =
tt::tt_metal::CreateSemaphore(*program_, logical_core_, 0, GetCoreType());
}
static_config_.output_depacketize_local_sem_id[idx] =
tt::tt_metal::CreateSemaphore(*program_, logical_core_, 0, GetCoreType());
static_config_.output_depacketize_remove_header[idx] = 1;
}
}
Expand Down Expand Up @@ -89,9 +84,6 @@ void DemuxKernel::GenerateDependentConfigs() {
dependent_config_.remote_tx_queue_start_addr_words[idx] =
demux_kernel->GetStaticConfig().rx_queue_start_addr_words.value();
dependent_config_.remote_tx_queue_size_words[idx] = 0x1000; // TODO: hard-coded on previous implementation
// Match previous implementation where downstream demux has output_depacketize fields zeroed out. TODO: can
// remove this later
dependent_config_.output_depacketize_downstream_sem_id[idx] = 0;
uint64_t dest_endpoint_output_map;
if (device_->num_hw_cqs() == 1) {
uint32_t dest_map_array[4] = {0, 0, 1, 1}; // TODO: how to set these generically? Currently just
Expand All @@ -107,10 +99,6 @@ void DemuxKernel::GenerateDependentConfigs() {
TT_FATAL(false, "Unexpected kernel type downstream of DEMUX");
}
}
// TODO: this is just to match the previous implementation hard-code, remove later
if (!tt::Cluster::instance().is_galaxy_cluster()) {
dependent_config_.output_depacketize = 0x3;
}
}

void DemuxKernel::CreateKernel() {
Expand Down
4 changes: 2 additions & 2 deletions tt_metal/impl/dispatch/kernel_config/dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,8 @@ void DispatchKernel::GenerateDependentConfigs() {
TT_ASSERT(downstream_kernels_.size() == 1);
auto prefetch_h_kernel = dynamic_cast<PrefetchKernel*>(downstream_kernels_[0]);
TT_ASSERT(prefetch_h_kernel);
dependent_config_.downstream_logical_core = UNUSED_LOGICAL_CORE_ADJUSTED;
dependent_config_.downstream_s_logical_core = UNUSED_LOGICAL_CORE_ADJUSTED;
dependent_config_.downstream_logical_core = UNUSED_LOGICAL_CORE;
dependent_config_.downstream_s_logical_core = UNUSED_LOGICAL_CORE;
dependent_config_.prefetch_h_noc_xy = tt::tt_metal::hal.noc_xy_encoding(
prefetch_h_kernel->GetVirtualCore().x, prefetch_h_kernel->GetVirtualCore().y);
dependent_config_.prefetch_h_local_downstream_sem_addr =
Expand Down
9 changes: 2 additions & 7 deletions tt_metal/impl/dispatch/kernel_config/eth_router.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,19 +60,14 @@ void EthRouterKernel::GenerateStaticConfigs() {
dependent_config_.input_packetize_dst_endpoint = {0x0};

static_config_.fwd_vc_count = this->static_config_.vc_count;
uint32_t created_semaphores = 0;
for (int idx = 0; idx < downstream_kernels_.size(); idx++) {
static_config_.output_depacketize_local_sem[idx] =
tt::tt_metal::CreateSemaphore(*program_, logical_core_, 0, GetCoreType());
// Forwward VCs are the ones that don't connect to a prefetch
if (auto pk = dynamic_cast<PrefetchKernel*>(downstream_kernels_[idx])) {
static_config_.fwd_vc_count = this->static_config_.fwd_vc_count.value() - 1;
static_config_.output_depacketize_local_sem[idx] = // TODO: to match for now, init one per vc after
tt::tt_metal::CreateSemaphore(*program_, logical_core_, 0, GetCoreType());
created_semaphores++;
}
}
if (created_semaphores == 0) { // Just to match previous implementation
tt::tt_metal::CreateSemaphore(*program_, logical_core_, 0, GetCoreType());
}

for (int idx = 0; idx < static_config_.vc_count.value(); idx++) {
static_config_.output_depacketize_log_page_size[idx] = dispatch_constants::PREFETCH_D_BUFFER_LOG_PAGE_SIZE;
Expand Down
2 changes: 0 additions & 2 deletions tt_metal/impl/dispatch/kernel_config/fd_kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
#include "tt_metal/impl/dispatch/kernels/packet_queue_ctrl.hpp"

#define UNUSED_LOGICAL_CORE tt_cxy_pair(device_->id(), 0, 0)
// TODO: Just to make match with previous implementation, remove later
#define UNUSED_LOGICAL_CORE_ADJUSTED tt_cxy_pair(servicing_device_id_, 0, 0)
#define UNUSED_SEM_ID 0

typedef struct {
Expand Down
3 changes: 1 addition & 2 deletions tt_metal/impl/dispatch/kernel_config/mux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,7 @@ void MuxKernel::CreateKernel() {
compile_args[4 + idx] |= (static_config_.remote_rx_network_type[idx].value() & 0xFF) << 24;
}
if (dependent_config_.input_packetize[idx]) {
// Zero out if input packetize not set to match previous implementation. TODO: don't have to do this
if (dependent_config_.input_packetize[idx].value() != 0) {
if (dependent_config_.input_packetize[idx]) {
compile_args[19 + idx] |= (dependent_config_.input_packetize[idx].value() & 0xFF);
compile_args[19 + idx] |= (dependent_config_.input_packetize_log_page_size[idx].value() & 0xFF) << 8;
compile_args[19 + idx] |= (dependent_config_.input_packetize_upstream_sem[idx].value() & 0xFF) << 16;
Expand Down
13 changes: 4 additions & 9 deletions tt_metal/impl/dispatch/kernel_config/prefetch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,6 @@ void PrefetchKernel::GenerateStaticConfigs() {
tt::tt_metal::CreateSemaphore(*program_, logical_core_, 0, GetCoreType());
static_config_.my_downstream_cb_sem_id = tt::tt_metal::CreateSemaphore(
*program_, logical_core_, static_config_.downstream_cb_pages.value(), GetCoreType());
tt::tt_metal::CreateSemaphore(
*program_, logical_core_, 0, GetCoreType()); // TODO: what is this third semaphore for?
static_config_.cmddat_q_log_page_size = dispatch_constants::PREFETCH_D_BUFFER_LOG_PAGE_SIZE;
static_config_.cmddat_q_blocks = dispatch_constants::PREFETCH_D_BUFFER_BLOCKS;

Expand All @@ -128,6 +126,8 @@ void PrefetchKernel::GenerateStaticConfigs() {
dependent_config_.downstream_cb_base = my_dispatch_constants.dispatch_buffer_base();
static_config_.downstream_cb_log_page_size = dispatch_constants::PREFETCH_D_BUFFER_LOG_PAGE_SIZE;
static_config_.downstream_cb_pages = my_dispatch_constants.dispatch_buffer_pages();
static_config_.my_downstream_cb_sem_id = tt::tt_metal::CreateSemaphore(
*program_, logical_core_, my_dispatch_constants.dispatch_buffer_pages(), GetCoreType());

static_config_.pcie_base = 0;
static_config_.pcie_size = 0;
Expand All @@ -152,11 +152,6 @@ void PrefetchKernel::GenerateStaticConfigs() {
static_config_.cmddat_q_pages = my_dispatch_constants.prefetch_d_buffer_pages();
static_config_.my_upstream_cb_sem_id =
tt::tt_metal::CreateSemaphore(*program_, logical_core_, 0, GetCoreType());
static_config_.my_downstream_cb_sem_id = tt::tt_metal::CreateSemaphore(
*program_,
logical_core_,
my_dispatch_constants.dispatch_buffer_pages(),
GetCoreType()); // TODO: this is out of order to match previous implementation
static_config_.cmddat_q_log_page_size = dispatch_constants::PREFETCH_D_BUFFER_LOG_PAGE_SIZE;
static_config_.cmddat_q_blocks = dispatch_constants::PREFETCH_D_BUFFER_BLOCKS;

Expand Down Expand Up @@ -231,15 +226,15 @@ void PrefetchKernel::GenerateDependentConfigs() {
} else if (static_config_.is_h_variant.value()) {
// Upstream, just host so no dispatch core
TT_ASSERT(upstream_kernels_.size() == 0);
dependent_config_.upstream_logical_core = UNUSED_LOGICAL_CORE_ADJUSTED;
dependent_config_.upstream_logical_core = UNUSED_LOGICAL_CORE;
dependent_config_.upstream_cb_sem_id = 0; // Used in prefetch_d only

// Downstream, expect just one ROUTER
TT_ASSERT(downstream_kernels_.size() == 1);
auto router_kernel = dynamic_cast<EthRouterKernel*>(downstream_kernels_[0]);
TT_ASSERT(router_kernel);
dependent_config_.downstream_logical_core = router_kernel->GetLogicalCore();
dependent_config_.downstream_s_logical_core = UNUSED_LOGICAL_CORE_ADJUSTED;
dependent_config_.downstream_s_logical_core = UNUSED_LOGICAL_CORE;
uint32_t router_idx = router_kernel->GetUpstreamPort(this); // Need the port that this connects to downstream
dependent_config_.downstream_cb_base =
(router_kernel->GetStaticConfig().rx_queue_start_addr_words.value() << 4) +
Expand Down

0 comments on commit 8149d97

Please sign in to comment.