Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
broskoTT committed Nov 25, 2024
1 parent 6bdc44f commit bc9d4d5
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 45 deletions.
17 changes: 4 additions & 13 deletions device/api/umd/device/pci_device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ struct PciDeviceInfo {
class PCIDevice {
const std::string device_path; // Path to character device: /dev/tenstorrent/N
const int pci_device_num; // N in /dev/tenstorrent/N
const int logical_id; // Unique identifier for each device in entire network topology
const int pci_device_file_desc; // Character device file descriptor
const PciDeviceInfo info; // PCI device info
const int numa_node; // -1 if non-NUMA
Expand All @@ -86,9 +85,8 @@ class PCIDevice {
* sysfs, and maps device memory region(s) into the process address space.
*
* @param pci_device_number N in /dev/tenstorrent/N
* @param logical_device_id unique identifier for this device in the network topology
*/
PCIDevice(int pci_device_number, int logical_device_id = 0);
PCIDevice(int pci_device_number);

/**
* PCIDevice destructor.
Expand Down Expand Up @@ -122,13 +120,6 @@ class PCIDevice {
*/
int get_device_num() const { return pci_device_num; }

/**
* @return unique integer for each device in entire network topology
* TODO: target for removal; upper layers shouldn't to pass this in here. It
* is unused by this class.
*/
int get_logical_id() const { return logical_id; }

/**
* @return PCI device id
*/
Expand Down Expand Up @@ -172,18 +163,18 @@ class PCIDevice {
tt_xy_pair end,
std::uint64_t address,
bool multicast,
std::unordered_map<chip_id_t, std::unordered_map<tt_xy_pair, tt_xy_pair>> &harvested_coord_translation,
std::unordered_map<tt_xy_pair, tt_xy_pair> &harvested_coord_translation,
std::uint64_t ordering);
dynamic_tlb set_dynamic_tlb(
unsigned int tlb_index,
tt_xy_pair target,
std::uint64_t address,
std::unordered_map<chip_id_t, std::unordered_map<tt_xy_pair, tt_xy_pair>> &harvested_coord_translation,
std::unordered_map<tt_xy_pair, tt_xy_pair> &harvested_coord_translation,
std::uint64_t ordering = tt::umd::tlb_data::Relaxed);
dynamic_tlb set_dynamic_tlb_broadcast(
unsigned int tlb_index,
std::uint64_t address,
std::unordered_map<chip_id_t, std::unordered_map<tt_xy_pair, tt_xy_pair>> &harvested_coord_translation,
std::unordered_map<tt_xy_pair, tt_xy_pair> &harvested_coord_translation,
tt_xy_pair start,
tt_xy_pair end,
std::uint64_t ordering = tt::umd::tlb_data::Relaxed);
Expand Down
54 changes: 30 additions & 24 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,8 +263,7 @@ void Cluster::create_device(
"Opening TT_PCI_INTERFACE_ID {} for netlist target_device_id: {}",
pci_interface_id,
logical_device_id);
m_pci_device_map.insert(
{logical_device_id, std::make_unique<PCIDevice>(pci_interface_id, logical_device_id)});
m_pci_device_map.insert({logical_device_id, std::make_unique<PCIDevice>(pci_interface_id)});
}
auto dev = m_pci_device_map.at(logical_device_id).get();

Expand Down Expand Up @@ -526,8 +525,7 @@ Cluster::Cluster(
m_num_pci_devices = available_device_ids.size();

int physical_device_id = available_device_ids[0];
// TODO: remove logical_device_id
PCIDevice pci_device(physical_device_id, 0);
PCIDevice pci_device(physical_device_id);
tt::ARCH device_arch = pci_device.get_arch();

std::string sdesc_path = tt_SocDescriptor::get_soc_descriptor_path(device_arch);
Expand Down Expand Up @@ -576,8 +574,7 @@ Cluster::Cluster(
m_num_pci_devices = available_device_ids.size();

int physical_device_id = available_device_ids[0];
// TODO: remove logical_device_id
PCIDevice pci_device(physical_device_id, 0);
PCIDevice pci_device(physical_device_id);
tt::ARCH device_arch = pci_device.get_arch();

std::string sdesc_path = tt_SocDescriptor::get_soc_descriptor_path(device_arch);
Expand Down Expand Up @@ -950,7 +947,6 @@ void Cluster::broadcast_pcie_tensix_risc_reset(chip_id_t chip_id, const TensixSo
PCIDevice* device = get_pci_device(chip_id);

auto valid = soft_resets & ALL_TENSIX_SOFT_RESET;
auto logical_id = device->get_logical_id();

log_debug(
LogSiliconDriver,
Expand All @@ -963,11 +959,11 @@ void Cluster::broadcast_pcie_tensix_risc_reset(chip_id_t chip_id, const TensixSo
auto [soft_reset_reg, _] = device->set_dynamic_tlb_broadcast(
architecture_implementation->get_reg_tlb(),
architecture_implementation->get_tensix_soft_reset_addr(),
harvested_coord_translation,
harvested_coord_translation.at(chip_id),
tt_xy_pair(0, 0),
tt_xy_pair(
architecture_implementation->get_grid_size_x() - 1,
architecture_implementation->get_grid_size_y() - 1 - num_rows_harvested.at(logical_id)),
architecture_implementation->get_grid_size_y() - 1 - num_rows_harvested.at(chip_id)),
TLB_DATA::Posted);
device->write_regs(soft_reset_reg, 1, &valid);
tt_driver_atomics::sfence();
Expand Down Expand Up @@ -1149,7 +1145,11 @@ void Cluster::write_device_memory(

while (size_in_bytes > 0) {
auto [mapped_address, tlb_size] = dev->set_dynamic_tlb(
tlb_index, target, address, harvested_coord_translation, dynamic_tlb_ordering_modes.at(fallback_tlb));
tlb_index,
target,
address,
harvested_coord_translation.at(target.chip),
dynamic_tlb_ordering_modes.at(fallback_tlb));
uint32_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size);
dev->write_block(mapped_address, transfer_size, buffer_addr);

Expand Down Expand Up @@ -1206,7 +1206,11 @@ void Cluster::read_device_memory(
log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index);
while (size_in_bytes > 0) {
auto [mapped_address, tlb_size] = dev->set_dynamic_tlb(
tlb_index, target, address, harvested_coord_translation, dynamic_tlb_ordering_modes.at(fallback_tlb));
tlb_index,
target,
address,
harvested_coord_translation.at(target.chip),
dynamic_tlb_ordering_modes.at(fallback_tlb));
uint32_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size);
dev->read_block(mapped_address, transfer_size, buffer_addr);

Expand Down Expand Up @@ -1382,7 +1386,7 @@ void Cluster::configure_tlb(
ordering == TLB_DATA::Strict || ordering == TLB_DATA::Posted || ordering == TLB_DATA::Relaxed,
"Invalid ordering specified in Cluster::configure_tlb");
PCIDevice* pci_device = get_pci_device(logical_device_id);
pci_device->set_dynamic_tlb(tlb_index, core, address, harvested_coord_translation, ordering);
pci_device->set_dynamic_tlb(tlb_index, core, address, harvested_coord_translation.at(logical_device_id), ordering);
auto tlb_size = std::get<1>(pci_device->get_architecture_implementation()->describe_tlb(tlb_index).value());
if (tlb_config_map.find(logical_device_id) == tlb_config_map.end()) {
tlb_config_map.insert({logical_device_id, {}});
Expand Down Expand Up @@ -1438,30 +1442,27 @@ void Cluster::init_pcie_iatus() {
}

int Cluster::test_setup_interface() {
int ret_val = 0;
int logical_device_id = m_pci_device_map.begin()->first;
PCIDevice* dev = m_pci_device_map.at(logical_device_id).get();
if (arch_name == tt::ARCH::GRAYSKULL) {
int ret_val = 0;
PCIDevice* dev = m_pci_device_map.begin()->second.get();

uint32_t mapped_reg = dev->set_dynamic_tlb(
dev->get_architecture_implementation()->get_reg_tlb(),
tt_xy_pair(0, 0),
0xffb20108,
harvested_coord_translation)
harvested_coord_translation.at(logical_device_id))
.bar_offset;

uint32_t regval = 0;
dev->read_regs(mapped_reg, 1, &regval);
ret_val = (regval != 0xffffffff && ((regval & 0x1) == 1)) ? 0 : 1;
return ret_val;
} else if (arch_name == tt::ARCH::WORMHOLE_B0) {
int ret_val = 0;
PCIDevice* dev = m_pci_device_map.begin()->second.get();

uint32_t mapped_reg = dev->set_dynamic_tlb(
dev->get_architecture_implementation()->get_reg_tlb(),
tt_xy_pair(1, 0),
0xffb20108,
harvested_coord_translation)
harvested_coord_translation.at(logical_device_id))
.bar_offset;

uint32_t regval = 0;
Expand All @@ -1475,7 +1476,7 @@ int Cluster::test_setup_interface() {

// uint32_t mapped_reg = dev->set_dynamic_tlb(m_pci_device_map.begin()->second,
// dev->get_architecture_implementation()->get_reg_tlb(), tt_xy_pair(1, 0), 0xffb20108,
// harvested_coord_translation).bar_offset;
// harvested_coord_translation.at(logical_device_id)).bar_offset;

// uint32_t regval = 0;
// read_regs(dev, mapped_reg, 1, &regval);
Expand Down Expand Up @@ -2565,7 +2566,12 @@ void Cluster::pcie_broadcast_write(
const scoped_lock<named_mutex> lock(*get_mutex(fallback_tlb, pci_device->get_device_num()));
while (size_in_bytes > 0) {
auto [mapped_address, tlb_size] = pci_device->set_dynamic_tlb_broadcast(
tlb_index, addr, harvested_coord_translation, start, end, dynamic_tlb_ordering_modes.at(fallback_tlb));
tlb_index,
addr,
harvested_coord_translation.at(chip),
start,
end,
dynamic_tlb_ordering_modes.at(fallback_tlb));
uint64_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size);
pci_device->write_block(mapped_address, transfer_size, buffer_addr);

Expand Down Expand Up @@ -3046,7 +3052,7 @@ void Cluster::read_mmio_device_register(
log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index);

auto [mapped_address, tlb_size] =
pci_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation, TLB_DATA::Strict);
pci_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation.at(core.chip), TLB_DATA::Strict);
// Align block to 4bytes if needed.
auto aligned_buf = tt_4_byte_aligned_buffer(mem_ptr, size);
pci_device->read_regs(mapped_address, aligned_buf.block_size / sizeof(std::uint32_t), aligned_buf.local_storage);
Expand All @@ -3066,7 +3072,7 @@ void Cluster::write_mmio_device_register(
log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index);

auto [mapped_address, tlb_size] =
pci_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation, TLB_DATA::Strict);
pci_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation.at(core.chip), TLB_DATA::Strict);
// Align block to 4bytes if needed.
auto aligned_buf = tt_4_byte_aligned_buffer(mem_ptr, size);
if (aligned_buf.input_size != aligned_buf.block_size) {
Expand Down
14 changes: 6 additions & 8 deletions device/pcie/pci_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,10 +249,9 @@ tt::ARCH PciDeviceInfo::get_arch() const {
return infos;
}

PCIDevice::PCIDevice(int pci_device_number, int logical_device_id) :
PCIDevice::PCIDevice(int pci_device_number) :
device_path(fmt::format("/dev/tenstorrent/{}", pci_device_number)),
pci_device_num(pci_device_number),
logical_id(logical_device_id),
pci_device_file_desc(open(device_path.c_str(), O_RDWR | O_CLOEXEC)),
info(read_device_info(pci_device_file_desc)),
numa_node(read_sysfs<int>(info, "numa_node")),
Expand Down Expand Up @@ -590,7 +589,7 @@ dynamic_tlb PCIDevice::set_dynamic_tlb(
tt_xy_pair end,
std::uint64_t address,
bool multicast,
std::unordered_map<chip_id_t, std::unordered_map<tt_xy_pair, tt_xy_pair>> &harvested_coord_translation,
std::unordered_map<tt_xy_pair, tt_xy_pair> &harvested_coord_translation,
std::uint64_t ordering) {
auto architecture_implementation = get_architecture_implementation();
if (multicast) {
Expand All @@ -612,8 +611,8 @@ dynamic_tlb PCIDevice::set_dynamic_tlb(

tt::umd::tlb_configuration tlb_config = architecture_implementation->get_tlb_configuration(tlb_index);
std::uint32_t TLB_CFG_REG_SIZE_BYTES = architecture_implementation->get_tlb_cfg_reg_size_bytes();
auto translated_start_coords = harvested_coord_translation.at(logical_id).at(start);
auto translated_end_coords = harvested_coord_translation.at(logical_id).at(end);
auto translated_start_coords = harvested_coord_translation.at(start);
auto translated_end_coords = harvested_coord_translation.at(end);
uint32_t tlb_address = address / tlb_config.size;
uint32_t local_address = address % tlb_config.size;
uint64_t tlb_base = tlb_config.base + (tlb_config.size * tlb_config.index_offset);
Expand Down Expand Up @@ -653,15 +652,15 @@ dynamic_tlb PCIDevice::set_dynamic_tlb(
unsigned int tlb_index,
tt_xy_pair target,
std::uint64_t address,
std::unordered_map<chip_id_t, std::unordered_map<tt_xy_pair, tt_xy_pair>> &harvested_coord_translation,
std::unordered_map<tt_xy_pair, tt_xy_pair> &harvested_coord_translation,
std::uint64_t ordering) {
return set_dynamic_tlb(tlb_index, tt_xy_pair(0, 0), target, address, false, harvested_coord_translation, ordering);
}

dynamic_tlb PCIDevice::set_dynamic_tlb_broadcast(
unsigned int tlb_index,
std::uint64_t address,
std::unordered_map<chip_id_t, std::unordered_map<tt_xy_pair, tt_xy_pair>> &harvested_coord_translation,
std::unordered_map<tt_xy_pair, tt_xy_pair> &harvested_coord_translation,
tt_xy_pair start,
tt_xy_pair end,
std::uint64_t ordering) {
Expand All @@ -676,7 +675,6 @@ tt::umd::architecture_implementation *PCIDevice::get_architecture_implementation
bool PCIDevice::init_hugepage(uint32_t num_host_mem_channels) {
const size_t hugepage_size = HUGEPAGE_REGION_SIZE;

// Convert from logical (device_id in netlist) to physical device_id (in case of virtualization)
auto physical_device_id = get_device_num();

std::string hugepage_dir = find_hugepage_dir(hugepage_size);
Expand Down

0 comments on commit bc9d4d5

Please sign in to comment.