Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove logical_id from pci_device #330

Merged
merged 1 commit into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 4 additions & 13 deletions device/api/umd/device/pci_device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ using tt::umd::semver_t;
class PCIDevice {
const std::string device_path; // Path to character device: /dev/tenstorrent/N
const int pci_device_num; // N in /dev/tenstorrent/N
const int logical_id; // Unique identifier for each device in entire network topology
const int pci_device_file_desc; // Character device file descriptor
const PciDeviceInfo info; // PCI device info
const int numa_node; // -1 if non-NUMA
Expand All @@ -93,9 +92,8 @@ class PCIDevice {
* sysfs, and maps device memory region(s) into the process address space.
*
* @param pci_device_number N in /dev/tenstorrent/N
* @param logical_device_id unique identifier for this device in the network topology
*/
PCIDevice(int pci_device_number, int logical_device_id = 0);
PCIDevice(int pci_device_number);

/**
* PCIDevice destructor.
Expand Down Expand Up @@ -129,13 +127,6 @@ class PCIDevice {
*/
int get_device_num() const { return pci_device_num; }

/**
* @return unique integer for each device in entire network topology
* TODO: target for removal; upper layers shouldn't to pass this in here. It
* is unused by this class.
*/
int get_logical_id() const { return logical_id; }

/**
* @return PCI device id
*/
Expand Down Expand Up @@ -179,18 +170,18 @@ class PCIDevice {
tt_xy_pair end,
std::uint64_t address,
bool multicast,
std::unordered_map<chip_id_t, std::unordered_map<tt_xy_pair, tt_xy_pair>> &harvested_coord_translation,
std::unordered_map<tt_xy_pair, tt_xy_pair> &harvested_coord_translation,
std::uint64_t ordering);
dynamic_tlb set_dynamic_tlb(
unsigned int tlb_index,
tt_xy_pair target,
std::uint64_t address,
std::unordered_map<chip_id_t, std::unordered_map<tt_xy_pair, tt_xy_pair>> &harvested_coord_translation,
std::unordered_map<tt_xy_pair, tt_xy_pair> &harvested_coord_translation,
std::uint64_t ordering = tt::umd::tlb_data::Relaxed);
dynamic_tlb set_dynamic_tlb_broadcast(
unsigned int tlb_index,
std::uint64_t address,
std::unordered_map<chip_id_t, std::unordered_map<tt_xy_pair, tt_xy_pair>> &harvested_coord_translation,
std::unordered_map<tt_xy_pair, tt_xy_pair> &harvested_coord_translation,
tt_xy_pair start,
tt_xy_pair end,
std::uint64_t ordering = tt::umd::tlb_data::Relaxed);
Expand Down
54 changes: 30 additions & 24 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,7 @@ void Cluster::create_device(
"Opening TT_PCI_INTERFACE_ID {} for netlist target_device_id: {}",
pci_interface_id,
logical_device_id);
m_pci_device_map.insert(
{logical_device_id, std::make_unique<PCIDevice>(pci_interface_id, logical_device_id)});
m_pci_device_map.insert({logical_device_id, std::make_unique<PCIDevice>(pci_interface_id)});
}
auto dev = m_pci_device_map.at(logical_device_id).get();

Expand Down Expand Up @@ -529,8 +528,7 @@ Cluster::Cluster(
m_num_pci_devices = available_device_ids.size();

int physical_device_id = available_device_ids[0];
// TODO: remove logical_device_id
PCIDevice pci_device(physical_device_id, 0);
PCIDevice pci_device(physical_device_id);
tt::ARCH device_arch = pci_device.get_arch();

std::string sdesc_path = tt_SocDescriptor::get_soc_descriptor_path(device_arch);
Expand Down Expand Up @@ -579,8 +577,7 @@ Cluster::Cluster(
m_num_pci_devices = available_device_ids.size();

int physical_device_id = available_device_ids[0];
// TODO: remove logical_device_id
PCIDevice pci_device(physical_device_id, 0);
PCIDevice pci_device(physical_device_id);
tt::ARCH device_arch = pci_device.get_arch();

std::string sdesc_path = tt_SocDescriptor::get_soc_descriptor_path(device_arch);
Expand Down Expand Up @@ -953,7 +950,6 @@ void Cluster::broadcast_pcie_tensix_risc_reset(chip_id_t chip_id, const TensixSo
PCIDevice* device = get_pci_device(chip_id);

auto valid = soft_resets & ALL_TENSIX_SOFT_RESET;
auto logical_id = device->get_logical_id();

log_debug(
LogSiliconDriver,
Expand All @@ -966,11 +962,11 @@ void Cluster::broadcast_pcie_tensix_risc_reset(chip_id_t chip_id, const TensixSo
auto [soft_reset_reg, _] = device->set_dynamic_tlb_broadcast(
architecture_implementation->get_reg_tlb(),
architecture_implementation->get_tensix_soft_reset_addr(),
harvested_coord_translation,
harvested_coord_translation.at(chip_id),
tt_xy_pair(0, 0),
tt_xy_pair(
architecture_implementation->get_grid_size_x() - 1,
architecture_implementation->get_grid_size_y() - 1 - num_rows_harvested.at(logical_id)),
architecture_implementation->get_grid_size_y() - 1 - num_rows_harvested.at(chip_id)),
TLB_DATA::Posted);
device->write_regs(soft_reset_reg, 1, &valid);
tt_driver_atomics::sfence();
Expand Down Expand Up @@ -1152,7 +1148,11 @@ void Cluster::write_device_memory(

while (size_in_bytes > 0) {
auto [mapped_address, tlb_size] = dev->set_dynamic_tlb(
tlb_index, target, address, harvested_coord_translation, dynamic_tlb_ordering_modes.at(fallback_tlb));
tlb_index,
target,
address,
harvested_coord_translation.at(target.chip),
dynamic_tlb_ordering_modes.at(fallback_tlb));
uint32_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size);
dev->write_block(mapped_address, transfer_size, buffer_addr);

Expand Down Expand Up @@ -1203,7 +1203,11 @@ void Cluster::read_device_memory(
log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index);
while (size_in_bytes > 0) {
auto [mapped_address, tlb_size] = dev->set_dynamic_tlb(
tlb_index, target, address, harvested_coord_translation, dynamic_tlb_ordering_modes.at(fallback_tlb));
tlb_index,
target,
address,
harvested_coord_translation.at(target.chip),
dynamic_tlb_ordering_modes.at(fallback_tlb));
uint32_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size);
dev->read_block(mapped_address, transfer_size, buffer_addr);

Expand Down Expand Up @@ -1379,7 +1383,7 @@ void Cluster::configure_tlb(
ordering == TLB_DATA::Strict || ordering == TLB_DATA::Posted || ordering == TLB_DATA::Relaxed,
"Invalid ordering specified in Cluster::configure_tlb");
PCIDevice* pci_device = get_pci_device(logical_device_id);
pci_device->set_dynamic_tlb(tlb_index, core, address, harvested_coord_translation, ordering);
pci_device->set_dynamic_tlb(tlb_index, core, address, harvested_coord_translation.at(logical_device_id), ordering);
auto tlb_size = std::get<1>(pci_device->get_architecture_implementation()->describe_tlb(tlb_index).value());
if (tlb_config_map.find(logical_device_id) == tlb_config_map.end()) {
tlb_config_map.insert({logical_device_id, {}});
Expand Down Expand Up @@ -1435,30 +1439,27 @@ void Cluster::init_pcie_iatus() {
}

int Cluster::test_setup_interface() {
int ret_val = 0;
int logical_device_id = m_pci_device_map.begin()->first;
PCIDevice* dev = m_pci_device_map.at(logical_device_id).get();
if (arch_name == tt::ARCH::GRAYSKULL) {
int ret_val = 0;
PCIDevice* dev = m_pci_device_map.begin()->second.get();

uint32_t mapped_reg = dev->set_dynamic_tlb(
dev->get_architecture_implementation()->get_reg_tlb(),
tt_xy_pair(0, 0),
0xffb20108,
harvested_coord_translation)
harvested_coord_translation.at(logical_device_id))
.bar_offset;

uint32_t regval = 0;
dev->read_regs(mapped_reg, 1, &regval);
ret_val = (regval != 0xffffffff && ((regval & 0x1) == 1)) ? 0 : 1;
return ret_val;
} else if (arch_name == tt::ARCH::WORMHOLE_B0) {
int ret_val = 0;
PCIDevice* dev = m_pci_device_map.begin()->second.get();

uint32_t mapped_reg = dev->set_dynamic_tlb(
dev->get_architecture_implementation()->get_reg_tlb(),
tt_xy_pair(1, 0),
0xffb20108,
harvested_coord_translation)
harvested_coord_translation.at(logical_device_id))
.bar_offset;

uint32_t regval = 0;
Expand All @@ -1472,7 +1473,7 @@ int Cluster::test_setup_interface() {

// uint32_t mapped_reg = dev->set_dynamic_tlb(m_pci_device_map.begin()->second,
// dev->get_architecture_implementation()->get_reg_tlb(), tt_xy_pair(1, 0), 0xffb20108,
// harvested_coord_translation).bar_offset;
// harvested_coord_translation.at(logical_device_id)).bar_offset;

// uint32_t regval = 0;
// read_regs(dev, mapped_reg, 1, &regval);
Expand Down Expand Up @@ -2562,7 +2563,12 @@ void Cluster::pcie_broadcast_write(
const scoped_lock<named_mutex> lock(*get_mutex(fallback_tlb, pci_device->get_device_num()));
while (size_in_bytes > 0) {
auto [mapped_address, tlb_size] = pci_device->set_dynamic_tlb_broadcast(
tlb_index, addr, harvested_coord_translation, start, end, dynamic_tlb_ordering_modes.at(fallback_tlb));
tlb_index,
addr,
harvested_coord_translation.at(chip),
start,
end,
dynamic_tlb_ordering_modes.at(fallback_tlb));
uint64_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size);
pci_device->write_block(mapped_address, transfer_size, buffer_addr);

Expand Down Expand Up @@ -3043,7 +3049,7 @@ void Cluster::read_mmio_device_register(
log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index);

auto [mapped_address, tlb_size] =
pci_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation, TLB_DATA::Strict);
pci_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation.at(core.chip), TLB_DATA::Strict);
// Align block to 4bytes if needed.
auto aligned_buf = tt_4_byte_aligned_buffer(mem_ptr, size);
pci_device->read_regs(mapped_address, aligned_buf.block_size / sizeof(std::uint32_t), aligned_buf.local_storage);
Expand All @@ -3063,7 +3069,7 @@ void Cluster::write_mmio_device_register(
log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index);

auto [mapped_address, tlb_size] =
pci_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation, TLB_DATA::Strict);
pci_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation.at(core.chip), TLB_DATA::Strict);
// Align block to 4bytes if needed.
auto aligned_buf = tt_4_byte_aligned_buffer(mem_ptr, size);
if (aligned_buf.input_size != aligned_buf.block_size) {
Expand Down
14 changes: 6 additions & 8 deletions device/pcie/pci_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,10 +258,9 @@ tt::ARCH PciDeviceInfo::get_arch() const {
return infos;
}

PCIDevice::PCIDevice(int pci_device_number, int logical_device_id) :
PCIDevice::PCIDevice(int pci_device_number) :
device_path(fmt::format("/dev/tenstorrent/{}", pci_device_number)),
pci_device_num(pci_device_number),
logical_id(logical_device_id),
pci_device_file_desc(open(device_path.c_str(), O_RDWR | O_CLOEXEC)),
info(read_device_info(pci_device_file_desc)),
numa_node(read_sysfs<int>(info, "numa_node", -1)), // default to -1 if not found
Expand Down Expand Up @@ -602,7 +601,7 @@ dynamic_tlb PCIDevice::set_dynamic_tlb(
tt_xy_pair end,
std::uint64_t address,
bool multicast,
std::unordered_map<chip_id_t, std::unordered_map<tt_xy_pair, tt_xy_pair>> &harvested_coord_translation,
std::unordered_map<tt_xy_pair, tt_xy_pair> &harvested_coord_translation,
std::uint64_t ordering) {
auto architecture_implementation = get_architecture_implementation();
if (multicast) {
Expand All @@ -624,8 +623,8 @@ dynamic_tlb PCIDevice::set_dynamic_tlb(

tt::umd::tlb_configuration tlb_config = architecture_implementation->get_tlb_configuration(tlb_index);
std::uint32_t TLB_CFG_REG_SIZE_BYTES = architecture_implementation->get_tlb_cfg_reg_size_bytes();
auto translated_start_coords = harvested_coord_translation.at(logical_id).at(start);
auto translated_end_coords = harvested_coord_translation.at(logical_id).at(end);
auto translated_start_coords = harvested_coord_translation.at(start);
auto translated_end_coords = harvested_coord_translation.at(end);
uint32_t tlb_address = address / tlb_config.size;
uint32_t local_address = address % tlb_config.size;
uint64_t tlb_base = tlb_config.base + (tlb_config.size * tlb_config.index_offset);
Expand Down Expand Up @@ -665,15 +664,15 @@ dynamic_tlb PCIDevice::set_dynamic_tlb(
unsigned int tlb_index,
tt_xy_pair target,
std::uint64_t address,
std::unordered_map<chip_id_t, std::unordered_map<tt_xy_pair, tt_xy_pair>> &harvested_coord_translation,
std::unordered_map<tt_xy_pair, tt_xy_pair> &harvested_coord_translation,
std::uint64_t ordering) {
return set_dynamic_tlb(tlb_index, tt_xy_pair(0, 0), target, address, false, harvested_coord_translation, ordering);
}

dynamic_tlb PCIDevice::set_dynamic_tlb_broadcast(
unsigned int tlb_index,
std::uint64_t address,
std::unordered_map<chip_id_t, std::unordered_map<tt_xy_pair, tt_xy_pair>> &harvested_coord_translation,
std::unordered_map<tt_xy_pair, tt_xy_pair> &harvested_coord_translation,
tt_xy_pair start,
tt_xy_pair end,
std::uint64_t ordering) {
Expand All @@ -688,7 +687,6 @@ tt::umd::architecture_implementation *PCIDevice::get_architecture_implementation
bool PCIDevice::init_hugepage(uint32_t num_host_mem_channels) {
const size_t hugepage_size = HUGEPAGE_REGION_SIZE;

// Convert from logical (device_id in netlist) to physical device_id (in case of virtualization)
auto physical_device_id = get_device_num();

std::string hugepage_dir = find_hugepage_dir(hugepage_size);
Expand Down