From bc9d4d5a241d055ac55a52c7792c2f8e37e68fc8 Mon Sep 17 00:00:00 2001 From: Bojan Rosko Date: Mon, 25 Nov 2024 13:07:44 +0000 Subject: [PATCH] init --- device/api/umd/device/pci_device.hpp | 17 +++------ device/cluster.cpp | 54 +++++++++++++++------------- device/pcie/pci_device.cpp | 14 ++++---- 3 files changed, 40 insertions(+), 45 deletions(-) diff --git a/device/api/umd/device/pci_device.hpp b/device/api/umd/device/pci_device.hpp index 914663d8..dbd8a9bd 100644 --- a/device/api/umd/device/pci_device.hpp +++ b/device/api/umd/device/pci_device.hpp @@ -60,7 +60,6 @@ struct PciDeviceInfo { class PCIDevice { const std::string device_path; // Path to character device: /dev/tenstorrent/N const int pci_device_num; // N in /dev/tenstorrent/N - const int logical_id; // Unique identifier for each device in entire network topology const int pci_device_file_desc; // Character device file descriptor const PciDeviceInfo info; // PCI device info const int numa_node; // -1 if non-NUMA @@ -86,9 +85,8 @@ class PCIDevice { * sysfs, and maps device memory region(s) into the process address space. * * @param pci_device_number N in /dev/tenstorrent/N - * @param logical_device_id unique identifier for this device in the network topology */ - PCIDevice(int pci_device_number, int logical_device_id = 0); + PCIDevice(int pci_device_number); /** * PCIDevice destructor. @@ -122,13 +120,6 @@ class PCIDevice { */ int get_device_num() const { return pci_device_num; } - /** - * @return unique integer for each device in entire network topology - * TODO: target for removal; upper layers shouldn't to pass this in here. It - * is unused by this class. - */ - int get_logical_id() const { return logical_id; } - /** * @return PCI device id */ @@ -172,18 +163,18 @@ class PCIDevice { tt_xy_pair end, std::uint64_t address, bool multicast, - std::unordered_map> &harvested_coord_translation, + std::unordered_map &harvested_coord_translation, std::uint64_t ordering); dynamic_tlb set_dynamic_tlb( unsigned int tlb_index, tt_xy_pair target, std::uint64_t address, - std::unordered_map> &harvested_coord_translation, + std::unordered_map &harvested_coord_translation, std::uint64_t ordering = tt::umd::tlb_data::Relaxed); dynamic_tlb set_dynamic_tlb_broadcast( unsigned int tlb_index, std::uint64_t address, - std::unordered_map> &harvested_coord_translation, + std::unordered_map &harvested_coord_translation, tt_xy_pair start, tt_xy_pair end, std::uint64_t ordering = tt::umd::tlb_data::Relaxed); diff --git a/device/cluster.cpp b/device/cluster.cpp index 7163223d..e6db3c6a 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -263,8 +263,7 @@ void Cluster::create_device( "Opening TT_PCI_INTERFACE_ID {} for netlist target_device_id: {}", pci_interface_id, logical_device_id); - m_pci_device_map.insert( - {logical_device_id, std::make_unique(pci_interface_id, logical_device_id)}); + m_pci_device_map.insert({logical_device_id, std::make_unique(pci_interface_id)}); } auto dev = m_pci_device_map.at(logical_device_id).get(); @@ -526,8 +525,7 @@ Cluster::Cluster( m_num_pci_devices = available_device_ids.size(); int physical_device_id = available_device_ids[0]; - // TODO: remove logical_device_id - PCIDevice pci_device(physical_device_id, 0); + PCIDevice pci_device(physical_device_id); tt::ARCH device_arch = pci_device.get_arch(); std::string sdesc_path = tt_SocDescriptor::get_soc_descriptor_path(device_arch); @@ -576,8 +574,7 @@ Cluster::Cluster( m_num_pci_devices = available_device_ids.size(); int physical_device_id = available_device_ids[0]; - // TODO: remove logical_device_id - PCIDevice pci_device(physical_device_id, 0); + PCIDevice pci_device(physical_device_id); tt::ARCH device_arch = pci_device.get_arch(); std::string sdesc_path = tt_SocDescriptor::get_soc_descriptor_path(device_arch); @@ -950,7 +947,6 @@ void Cluster::broadcast_pcie_tensix_risc_reset(chip_id_t chip_id, const TensixSo PCIDevice* device = get_pci_device(chip_id); auto valid = soft_resets & ALL_TENSIX_SOFT_RESET; - auto logical_id = device->get_logical_id(); log_debug( LogSiliconDriver, @@ -963,11 +959,11 @@ void Cluster::broadcast_pcie_tensix_risc_reset(chip_id_t chip_id, const TensixSo auto [soft_reset_reg, _] = device->set_dynamic_tlb_broadcast( architecture_implementation->get_reg_tlb(), architecture_implementation->get_tensix_soft_reset_addr(), - harvested_coord_translation, + harvested_coord_translation.at(chip_id), tt_xy_pair(0, 0), tt_xy_pair( architecture_implementation->get_grid_size_x() - 1, - architecture_implementation->get_grid_size_y() - 1 - num_rows_harvested.at(logical_id)), + architecture_implementation->get_grid_size_y() - 1 - num_rows_harvested.at(chip_id)), TLB_DATA::Posted); device->write_regs(soft_reset_reg, 1, &valid); tt_driver_atomics::sfence(); @@ -1149,7 +1145,11 @@ void Cluster::write_device_memory( while (size_in_bytes > 0) { auto [mapped_address, tlb_size] = dev->set_dynamic_tlb( - tlb_index, target, address, harvested_coord_translation, dynamic_tlb_ordering_modes.at(fallback_tlb)); + tlb_index, + target, + address, + harvested_coord_translation.at(target.chip), + dynamic_tlb_ordering_modes.at(fallback_tlb)); uint32_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size); dev->write_block(mapped_address, transfer_size, buffer_addr); @@ -1206,7 +1206,11 @@ void Cluster::read_device_memory( log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index); while (size_in_bytes > 0) { auto [mapped_address, tlb_size] = dev->set_dynamic_tlb( - tlb_index, target, address, harvested_coord_translation, dynamic_tlb_ordering_modes.at(fallback_tlb)); + tlb_index, + target, + address, + harvested_coord_translation.at(target.chip), + dynamic_tlb_ordering_modes.at(fallback_tlb)); uint32_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size); dev->read_block(mapped_address, transfer_size, buffer_addr); @@ -1382,7 +1386,7 @@ void Cluster::configure_tlb( ordering == TLB_DATA::Strict || ordering == TLB_DATA::Posted || ordering == TLB_DATA::Relaxed, "Invalid ordering specified in Cluster::configure_tlb"); PCIDevice* pci_device = get_pci_device(logical_device_id); - pci_device->set_dynamic_tlb(tlb_index, core, address, harvested_coord_translation, ordering); + pci_device->set_dynamic_tlb(tlb_index, core, address, harvested_coord_translation.at(logical_device_id), ordering); auto tlb_size = std::get<1>(pci_device->get_architecture_implementation()->describe_tlb(tlb_index).value()); if (tlb_config_map.find(logical_device_id) == tlb_config_map.end()) { tlb_config_map.insert({logical_device_id, {}}); @@ -1438,15 +1442,15 @@ void Cluster::init_pcie_iatus() { } int Cluster::test_setup_interface() { + int ret_val = 0; + int logical_device_id = m_pci_device_map.begin()->first; + PCIDevice* dev = m_pci_device_map.at(logical_device_id).get(); if (arch_name == tt::ARCH::GRAYSKULL) { - int ret_val = 0; - PCIDevice* dev = m_pci_device_map.begin()->second.get(); - uint32_t mapped_reg = dev->set_dynamic_tlb( dev->get_architecture_implementation()->get_reg_tlb(), tt_xy_pair(0, 0), 0xffb20108, - harvested_coord_translation) + harvested_coord_translation.at(logical_device_id)) .bar_offset; uint32_t regval = 0; @@ -1454,14 +1458,11 @@ int Cluster::test_setup_interface() { ret_val = (regval != 0xffffffff && ((regval & 0x1) == 1)) ? 0 : 1; return ret_val; } else if (arch_name == tt::ARCH::WORMHOLE_B0) { - int ret_val = 0; - PCIDevice* dev = m_pci_device_map.begin()->second.get(); - uint32_t mapped_reg = dev->set_dynamic_tlb( dev->get_architecture_implementation()->get_reg_tlb(), tt_xy_pair(1, 0), 0xffb20108, - harvested_coord_translation) + harvested_coord_translation.at(logical_device_id)) .bar_offset; uint32_t regval = 0; @@ -1475,7 +1476,7 @@ int Cluster::test_setup_interface() { // uint32_t mapped_reg = dev->set_dynamic_tlb(m_pci_device_map.begin()->second, // dev->get_architecture_implementation()->get_reg_tlb(), tt_xy_pair(1, 0), 0xffb20108, - // harvested_coord_translation).bar_offset; + // harvested_coord_translation.at(logical_device_id)).bar_offset; // uint32_t regval = 0; // read_regs(dev, mapped_reg, 1, ®val); @@ -2565,7 +2566,12 @@ void Cluster::pcie_broadcast_write( const scoped_lock lock(*get_mutex(fallback_tlb, pci_device->get_device_num())); while (size_in_bytes > 0) { auto [mapped_address, tlb_size] = pci_device->set_dynamic_tlb_broadcast( - tlb_index, addr, harvested_coord_translation, start, end, dynamic_tlb_ordering_modes.at(fallback_tlb)); + tlb_index, + addr, + harvested_coord_translation.at(chip), + start, + end, + dynamic_tlb_ordering_modes.at(fallback_tlb)); uint64_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size); pci_device->write_block(mapped_address, transfer_size, buffer_addr); @@ -3046,7 +3052,7 @@ void Cluster::read_mmio_device_register( log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index); auto [mapped_address, tlb_size] = - pci_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation, TLB_DATA::Strict); + pci_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation.at(core.chip), TLB_DATA::Strict); // Align block to 4bytes if needed. auto aligned_buf = tt_4_byte_aligned_buffer(mem_ptr, size); pci_device->read_regs(mapped_address, aligned_buf.block_size / sizeof(std::uint32_t), aligned_buf.local_storage); @@ -3066,7 +3072,7 @@ void Cluster::write_mmio_device_register( log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index); auto [mapped_address, tlb_size] = - pci_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation, TLB_DATA::Strict); + pci_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation.at(core.chip), TLB_DATA::Strict); // Align block to 4bytes if needed. auto aligned_buf = tt_4_byte_aligned_buffer(mem_ptr, size); if (aligned_buf.input_size != aligned_buf.block_size) { diff --git a/device/pcie/pci_device.cpp b/device/pcie/pci_device.cpp index bdf40962..83dccbb9 100644 --- a/device/pcie/pci_device.cpp +++ b/device/pcie/pci_device.cpp @@ -249,10 +249,9 @@ tt::ARCH PciDeviceInfo::get_arch() const { return infos; } -PCIDevice::PCIDevice(int pci_device_number, int logical_device_id) : +PCIDevice::PCIDevice(int pci_device_number) : device_path(fmt::format("/dev/tenstorrent/{}", pci_device_number)), pci_device_num(pci_device_number), - logical_id(logical_device_id), pci_device_file_desc(open(device_path.c_str(), O_RDWR | O_CLOEXEC)), info(read_device_info(pci_device_file_desc)), numa_node(read_sysfs(info, "numa_node")), @@ -590,7 +589,7 @@ dynamic_tlb PCIDevice::set_dynamic_tlb( tt_xy_pair end, std::uint64_t address, bool multicast, - std::unordered_map> &harvested_coord_translation, + std::unordered_map &harvested_coord_translation, std::uint64_t ordering) { auto architecture_implementation = get_architecture_implementation(); if (multicast) { @@ -612,8 +611,8 @@ dynamic_tlb PCIDevice::set_dynamic_tlb( tt::umd::tlb_configuration tlb_config = architecture_implementation->get_tlb_configuration(tlb_index); std::uint32_t TLB_CFG_REG_SIZE_BYTES = architecture_implementation->get_tlb_cfg_reg_size_bytes(); - auto translated_start_coords = harvested_coord_translation.at(logical_id).at(start); - auto translated_end_coords = harvested_coord_translation.at(logical_id).at(end); + auto translated_start_coords = harvested_coord_translation.at(start); + auto translated_end_coords = harvested_coord_translation.at(end); uint32_t tlb_address = address / tlb_config.size; uint32_t local_address = address % tlb_config.size; uint64_t tlb_base = tlb_config.base + (tlb_config.size * tlb_config.index_offset); @@ -653,7 +652,7 @@ dynamic_tlb PCIDevice::set_dynamic_tlb( unsigned int tlb_index, tt_xy_pair target, std::uint64_t address, - std::unordered_map> &harvested_coord_translation, + std::unordered_map &harvested_coord_translation, std::uint64_t ordering) { return set_dynamic_tlb(tlb_index, tt_xy_pair(0, 0), target, address, false, harvested_coord_translation, ordering); } @@ -661,7 +660,7 @@ dynamic_tlb PCIDevice::set_dynamic_tlb( dynamic_tlb PCIDevice::set_dynamic_tlb_broadcast( unsigned int tlb_index, std::uint64_t address, - std::unordered_map> &harvested_coord_translation, + std::unordered_map &harvested_coord_translation, tt_xy_pair start, tt_xy_pair end, std::uint64_t ordering) { @@ -676,7 +675,6 @@ tt::umd::architecture_implementation *PCIDevice::get_architecture_implementation bool PCIDevice::init_hugepage(uint32_t num_host_mem_channels) { const size_t hugepage_size = HUGEPAGE_REGION_SIZE; - // Convert from logical (device_id in netlist) to physical device_id (in case of virtualization) auto physical_device_id = get_device_num(); std::string hugepage_dir = find_hugepage_dir(hugepage_size);