diff --git a/tt_metal/impl/dispatch/kernel_config/demux.cpp b/tt_metal/impl/dispatch/kernel_config/demux.cpp index aa82eda1f467..1a34bf1c6fd7 100644 --- a/tt_metal/impl/dispatch/kernel_config/demux.cpp +++ b/tt_metal/impl/dispatch/kernel_config/demux.cpp @@ -24,18 +24,13 @@ void DemuxKernel::GenerateStaticConfigs() { static_config_.test_results_buf_size_bytes = 0; static_config_.timeout_cycles = 0; - // TODO: Do we need an upstream sem here? for (int idx = 0; idx < downstream_kernels_.size(); idx++) { FDKernel* k = downstream_kernels_[idx]; static_config_.remote_tx_queue_id[idx] = 0; static_config_.remote_tx_network_type[idx] = (uint32_t)DispatchRemoteNetworkType::NOC0; static_config_.output_depacketize_cb_log_page_size[idx] = dispatch_constants::DISPATCH_BUFFER_LOG_PAGE_SIZE; - // Only connected dispatchers need a semaphore. TODO: can initialize anyways, but this matches previous - // implementation - if (dynamic_cast(k)) { - static_config_.output_depacketize_local_sem_id[idx] = - tt::tt_metal::CreateSemaphore(*program_, logical_core_, 0, GetCoreType()); - } + static_config_.output_depacketize_local_sem_id[idx] = + tt::tt_metal::CreateSemaphore(*program_, logical_core_, 0, GetCoreType()); static_config_.output_depacketize_remove_header[idx] = 1; } } @@ -89,9 +84,6 @@ void DemuxKernel::GenerateDependentConfigs() { dependent_config_.remote_tx_queue_start_addr_words[idx] = demux_kernel->GetStaticConfig().rx_queue_start_addr_words.value(); dependent_config_.remote_tx_queue_size_words[idx] = 0x1000; // TODO: hard-coded on previous implementation - // Match previous implementation where downstream demux has output_depacketize fields zeroed out. TODO: can - // remove this later - dependent_config_.output_depacketize_downstream_sem_id[idx] = 0; uint64_t dest_endpoint_output_map; if (device_->num_hw_cqs() == 1) { uint32_t dest_map_array[4] = {0, 0, 1, 1}; // TODO: how to set these generically? Currently just @@ -107,10 +99,6 @@ void DemuxKernel::GenerateDependentConfigs() { TT_FATAL(false, "Unexpected kernel type downstream of DEMUX"); } } - // TODO: this is just to match the previous implementation hard-code, remove later - if (!tt::Cluster::instance().is_galaxy_cluster()) { - dependent_config_.output_depacketize = 0x3; - } } void DemuxKernel::CreateKernel() { diff --git a/tt_metal/impl/dispatch/kernel_config/dispatch.cpp b/tt_metal/impl/dispatch/kernel_config/dispatch.cpp index c964ab5581d2..b7f764134df8 100644 --- a/tt_metal/impl/dispatch/kernel_config/dispatch.cpp +++ b/tt_metal/impl/dispatch/kernel_config/dispatch.cpp @@ -220,8 +220,8 @@ void DispatchKernel::GenerateDependentConfigs() { TT_ASSERT(downstream_kernels_.size() == 1); auto prefetch_h_kernel = dynamic_cast(downstream_kernels_[0]); TT_ASSERT(prefetch_h_kernel); - dependent_config_.downstream_logical_core = UNUSED_LOGICAL_CORE_ADJUSTED; - dependent_config_.downstream_s_logical_core = UNUSED_LOGICAL_CORE_ADJUSTED; + dependent_config_.downstream_logical_core = UNUSED_LOGICAL_CORE; + dependent_config_.downstream_s_logical_core = UNUSED_LOGICAL_CORE; dependent_config_.prefetch_h_noc_xy = tt::tt_metal::hal.noc_xy_encoding( prefetch_h_kernel->GetVirtualCore().x, prefetch_h_kernel->GetVirtualCore().y); dependent_config_.prefetch_h_local_downstream_sem_addr = diff --git a/tt_metal/impl/dispatch/kernel_config/eth_router.cpp b/tt_metal/impl/dispatch/kernel_config/eth_router.cpp index c21dfcad2465..28762190d2c8 100644 --- a/tt_metal/impl/dispatch/kernel_config/eth_router.cpp +++ b/tt_metal/impl/dispatch/kernel_config/eth_router.cpp @@ -60,19 +60,14 @@ void EthRouterKernel::GenerateStaticConfigs() { dependent_config_.input_packetize_dst_endpoint = {0x0}; static_config_.fwd_vc_count = this->static_config_.vc_count; - uint32_t created_semaphores = 0; for (int idx = 0; idx < downstream_kernels_.size(); idx++) { + static_config_.output_depacketize_local_sem[idx] = + tt::tt_metal::CreateSemaphore(*program_, logical_core_, 0, GetCoreType()); // Forwward VCs are the ones that don't connect to a prefetch if (auto pk = dynamic_cast(downstream_kernels_[idx])) { static_config_.fwd_vc_count = this->static_config_.fwd_vc_count.value() - 1; - static_config_.output_depacketize_local_sem[idx] = // TODO: to match for now, init one per vc after - tt::tt_metal::CreateSemaphore(*program_, logical_core_, 0, GetCoreType()); - created_semaphores++; } } - if (created_semaphores == 0) { // Just to match previous implementation - tt::tt_metal::CreateSemaphore(*program_, logical_core_, 0, GetCoreType()); - } for (int idx = 0; idx < static_config_.vc_count.value(); idx++) { static_config_.output_depacketize_log_page_size[idx] = dispatch_constants::PREFETCH_D_BUFFER_LOG_PAGE_SIZE; diff --git a/tt_metal/impl/dispatch/kernel_config/fd_kernel.hpp b/tt_metal/impl/dispatch/kernel_config/fd_kernel.hpp index 38e6594e993f..def958f81179 100644 --- a/tt_metal/impl/dispatch/kernel_config/fd_kernel.hpp +++ b/tt_metal/impl/dispatch/kernel_config/fd_kernel.hpp @@ -8,8 +8,6 @@ #include "tt_metal/impl/dispatch/kernels/packet_queue_ctrl.hpp" #define UNUSED_LOGICAL_CORE tt_cxy_pair(device_->id(), 0, 0) -// TODO: Just to make match with previous implementation, remove later -#define UNUSED_LOGICAL_CORE_ADJUSTED tt_cxy_pair(servicing_device_id_, 0, 0) #define UNUSED_SEM_ID 0 typedef struct { diff --git a/tt_metal/impl/dispatch/kernel_config/mux.cpp b/tt_metal/impl/dispatch/kernel_config/mux.cpp index b60dd5d1a0d8..997b93832bd0 100644 --- a/tt_metal/impl/dispatch/kernel_config/mux.cpp +++ b/tt_metal/impl/dispatch/kernel_config/mux.cpp @@ -121,8 +121,7 @@ void MuxKernel::CreateKernel() { compile_args[4 + idx] |= (static_config_.remote_rx_network_type[idx].value() & 0xFF) << 24; } if (dependent_config_.input_packetize[idx]) { - // Zero out if input packetize not set to match previous implementation. TODO: don't have to do this - if (dependent_config_.input_packetize[idx].value() != 0) { + if (dependent_config_.input_packetize[idx]) { compile_args[19 + idx] |= (dependent_config_.input_packetize[idx].value() & 0xFF); compile_args[19 + idx] |= (dependent_config_.input_packetize_log_page_size[idx].value() & 0xFF) << 8; compile_args[19 + idx] |= (dependent_config_.input_packetize_upstream_sem[idx].value() & 0xFF) << 16; diff --git a/tt_metal/impl/dispatch/kernel_config/prefetch.cpp b/tt_metal/impl/dispatch/kernel_config/prefetch.cpp index 40abb9a019ab..11b66891e4f8 100644 --- a/tt_metal/impl/dispatch/kernel_config/prefetch.cpp +++ b/tt_metal/impl/dispatch/kernel_config/prefetch.cpp @@ -112,8 +112,6 @@ void PrefetchKernel::GenerateStaticConfigs() { tt::tt_metal::CreateSemaphore(*program_, logical_core_, 0, GetCoreType()); static_config_.my_downstream_cb_sem_id = tt::tt_metal::CreateSemaphore( *program_, logical_core_, static_config_.downstream_cb_pages.value(), GetCoreType()); - tt::tt_metal::CreateSemaphore( - *program_, logical_core_, 0, GetCoreType()); // TODO: what is this third semaphore for? static_config_.cmddat_q_log_page_size = dispatch_constants::PREFETCH_D_BUFFER_LOG_PAGE_SIZE; static_config_.cmddat_q_blocks = dispatch_constants::PREFETCH_D_BUFFER_BLOCKS; @@ -128,6 +126,8 @@ void PrefetchKernel::GenerateStaticConfigs() { dependent_config_.downstream_cb_base = my_dispatch_constants.dispatch_buffer_base(); static_config_.downstream_cb_log_page_size = dispatch_constants::PREFETCH_D_BUFFER_LOG_PAGE_SIZE; static_config_.downstream_cb_pages = my_dispatch_constants.dispatch_buffer_pages(); + static_config_.my_downstream_cb_sem_id = tt::tt_metal::CreateSemaphore( + *program_, logical_core_, my_dispatch_constants.dispatch_buffer_pages(), GetCoreType()); static_config_.pcie_base = 0; static_config_.pcie_size = 0; @@ -152,11 +152,6 @@ void PrefetchKernel::GenerateStaticConfigs() { static_config_.cmddat_q_pages = my_dispatch_constants.prefetch_d_buffer_pages(); static_config_.my_upstream_cb_sem_id = tt::tt_metal::CreateSemaphore(*program_, logical_core_, 0, GetCoreType()); - static_config_.my_downstream_cb_sem_id = tt::tt_metal::CreateSemaphore( - *program_, - logical_core_, - my_dispatch_constants.dispatch_buffer_pages(), - GetCoreType()); // TODO: this is out of order to match previous implementation static_config_.cmddat_q_log_page_size = dispatch_constants::PREFETCH_D_BUFFER_LOG_PAGE_SIZE; static_config_.cmddat_q_blocks = dispatch_constants::PREFETCH_D_BUFFER_BLOCKS; @@ -231,7 +226,7 @@ void PrefetchKernel::GenerateDependentConfigs() { } else if (static_config_.is_h_variant.value()) { // Upstream, just host so no dispatch core TT_ASSERT(upstream_kernels_.size() == 0); - dependent_config_.upstream_logical_core = UNUSED_LOGICAL_CORE_ADJUSTED; + dependent_config_.upstream_logical_core = UNUSED_LOGICAL_CORE; dependent_config_.upstream_cb_sem_id = 0; // Used in prefetch_d only // Downstream, expect just one ROUTER @@ -239,7 +234,7 @@ void PrefetchKernel::GenerateDependentConfigs() { auto router_kernel = dynamic_cast(downstream_kernels_[0]); TT_ASSERT(router_kernel); dependent_config_.downstream_logical_core = router_kernel->GetLogicalCore(); - dependent_config_.downstream_s_logical_core = UNUSED_LOGICAL_CORE_ADJUSTED; + dependent_config_.downstream_s_logical_core = UNUSED_LOGICAL_CORE; uint32_t router_idx = router_kernel->GetUpstreamPort(this); // Need the port that this connects to downstream dependent_config_.downstream_cb_base = (router_kernel->GetStaticConfig().rx_queue_start_addr_words.value() << 4) +