Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove describe_tlb function #419

Merged
merged 5 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion device/api/umd/device/architecture_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ class architecture_implementation {

virtual std::tuple<xy_pair, xy_pair> multicast_workaround(xy_pair start, xy_pair end) const = 0;
virtual tlb_configuration get_tlb_configuration(uint32_t tlb_index) const = 0;
virtual std::optional<std::tuple<std::uint64_t, std::uint64_t>> describe_tlb(std::int32_t tlb_index) const = 0;
virtual std::pair<std::uint64_t, std::uint64_t> get_tlb_data(
std::uint32_t tlb_index, const tlb_data& data) const = 0;

Expand Down
1 change: 0 additions & 1 deletion device/api/umd/device/blackhole_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,6 @@ class blackhole_implementation : public architecture_implementation {

std::tuple<xy_pair, xy_pair> multicast_workaround(xy_pair start, xy_pair end) const override;
tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override;
std::optional<std::tuple<std::uint64_t, std::uint64_t>> describe_tlb(std::int32_t tlb_index) const override;
std::pair<std::uint64_t, std::uint64_t> get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override;

tt_device_l1_address_params get_l1_address_params() const override;
Expand Down
5 changes: 5 additions & 0 deletions device/api/umd/device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,10 @@ class Cluster : public tt_device {
* If the tlbs are initialized, returns a tuple with the TLB base address and its size
*/
std::optional<std::tuple<uint32_t, uint32_t>> get_tlb_data_from_target(const tt_cxy_pair& target);
/**
* Returns a struct with the TLB configuration, or throws an exception if the target does not have a static TLB.
*/
tlb_configuration get_tlb_configuration(const tt_cxy_pair& target);
/**
* Provide fast write access to a statically-mapped TLB.
* It is the caller's responsibility to ensure that
Expand Down Expand Up @@ -707,6 +711,7 @@ class Cluster : public tt_device {
const std::string& fallback_tlb);
std::optional<std::tuple<uint32_t, uint32_t>> get_tlb_data_from_target(
const chip_id_t chip, const tt::umd::CoreCoord core);
tlb_configuration get_tlb_configuration(const chip_id_t chip, const tt::umd::CoreCoord core);
tt::Writer get_static_tlb_writer(const chip_id_t chip, const tt::umd::CoreCoord target);
virtual void configure_active_ethernet_cores_for_mmio_device(
const std::unordered_set<tt::umd::CoreCoord>& active_eth_cores_per_chip, chip_id_t mmio_chip);
Expand Down
1 change: 0 additions & 1 deletion device/api/umd/device/grayskull_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,6 @@ class grayskull_implementation : public architecture_implementation {

std::tuple<xy_pair, xy_pair> multicast_workaround(xy_pair start, xy_pair end) const override;
tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override;
std::optional<std::tuple<std::uint64_t, std::uint64_t>> describe_tlb(std::int32_t tlb_index) const override;
std::pair<std::uint64_t, std::uint64_t> get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override;

tt_device_l1_address_params get_l1_address_params() const override;
Expand Down
7 changes: 4 additions & 3 deletions device/api/umd/device/types/tlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,10 @@ struct tlb_data {

struct tlb_configuration {
uint64_t size;
uint32_t base;
uint32_t cfg_addr;
uint32_t index_offset;
uint64_t base;
uint64_t cfg_addr;
uint64_t index_offset;
uint64_t tlb_offset;
tlb_offsets offset;
};

Expand Down
1 change: 0 additions & 1 deletion device/api/umd/device/wormhole_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,6 @@ class wormhole_implementation : public architecture_implementation {

std::tuple<xy_pair, xy_pair> multicast_workaround(xy_pair start, xy_pair end) const override;
tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override;
std::optional<std::tuple<std::uint64_t, std::uint64_t>> describe_tlb(std::int32_t tlb_index) const override;
std::pair<std::uint64_t, std::uint64_t> get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override;

tt_device_l1_address_params get_l1_address_params() const override;
Expand Down
28 changes: 4 additions & 24 deletions device/blackhole/blackhole_implementation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ tlb_configuration blackhole_implementation::get_tlb_configuration(uint32_t tlb_i
.base = blackhole::DYNAMIC_TLB_4G_BASE,
.cfg_addr = blackhole::DYNAMIC_TLB_4G_CFG_ADDR,
.index_offset = tlb_index - blackhole::TLB_BASE_INDEX_4G,
.tlb_offset = blackhole::DYNAMIC_TLB_4G_BASE +
(tlb_index - blackhole::TLB_BASE_INDEX_4G) * blackhole::DYNAMIC_TLB_4G_SIZE,
.offset = blackhole::TLB_4G_OFFSET,
};
}
Expand All @@ -43,34 +45,12 @@ tlb_configuration blackhole_implementation::get_tlb_configuration(uint32_t tlb_i
.base = blackhole::DYNAMIC_TLB_2M_BASE,
.cfg_addr = blackhole::DYNAMIC_TLB_2M_CFG_ADDR,
.index_offset = tlb_index - blackhole::TLB_BASE_INDEX_2M,
.tlb_offset = blackhole::DYNAMIC_TLB_2M_BASE +
(tlb_index - blackhole::TLB_BASE_INDEX_2M) * blackhole::DYNAMIC_TLB_2M_SIZE,
.offset = blackhole::TLB_2M_OFFSET,
};
}

std::optional<std::tuple<std::uint64_t, std::uint64_t>> blackhole_implementation::describe_tlb(
std::int32_t tlb_index) const {
std::uint32_t TLB_COUNT_2M = 202;

std::uint32_t TLB_BASE_2M = 0;
if (tlb_index < 0) {
return std::nullopt;
}

if (tlb_index >= TLB_COUNT_2M && tlb_index < TLB_COUNT_2M + blackhole::TLB_COUNT_4G) {
auto tlb_offset = tlb_index - TLB_COUNT_2M;
auto size = blackhole::TLB_4G_SIZE;
return std::tuple(blackhole::TLB_BASE_4G + tlb_offset * size, size);
}

if (tlb_index >= 0 && tlb_index < TLB_COUNT_2M) {
auto tlb_offset = tlb_index;
auto size = 1 << 21;
return std::tuple(TLB_BASE_2M + tlb_offset * size, size);
}

return std::nullopt;
}

std::pair<std::uint64_t, std::uint64_t> blackhole_implementation::get_tlb_data(
std::uint32_t tlb_index, const tlb_data& data) const {
if (tlb_index < blackhole::TLB_COUNT_2M) {
Expand Down
59 changes: 36 additions & 23 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,9 @@ bool Cluster::is_tlb_mapped(tt_cxy_pair target, uint64_t address, uint32_t size_
auto* dev = get_tt_device(target.chip);

int32_t tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y));
auto tlb_description = dev->get_architecture_implementation()->describe_tlb(tlb_index);
tlb_configuration tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(tlb_index);

return tlb_description.has_value() &&
address_in_tlb_space(address, size_in_bytes, tlb_index, std::get<1>(tlb_description.value()), target.chip);
return address_in_tlb_space(address, size_in_bytes, tlb_index, tlb_description.size, target.chip);
}

void Cluster::initialize_interprocess_mutexes(int logical_device_id, bool cleanup_mutexes_in_shm) {
Expand Down Expand Up @@ -1090,12 +1089,10 @@ tt::Writer Cluster::get_static_tlb_writer(tt_cxy_pair target) {
}

auto tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y));
auto tlb_data = dev->get_architecture_implementation()->describe_tlb(tlb_index);
tlb_configuration tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(tlb_index);

auto [tlb_offset, tlb_size] = tlb_data.value();
auto* base = reinterpret_cast<uint8_t*>(dev->get_pci_device()->bar0_wc);

return tt::Writer(base + tlb_offset, tlb_size);
return tt::Writer(base + tlb_description.tlb_offset, tlb_description.size);
}

tt::Writer Cluster::get_static_tlb_writer(const chip_id_t chip, const CoreCoord target) {
Expand Down Expand Up @@ -1123,15 +1120,17 @@ void Cluster::write_device_memory(
small_access);

if (is_tlb_mapped(target, address, size_in_bytes)) {
auto tlb_description = dev->get_architecture_implementation()->describe_tlb(
tlb_configuration tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(
map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y)));
auto [tlb_offset, tlb_size] = tlb_description.value();
if (dev->get_pci_device()->bar4_wc != nullptr && tlb_size == BH_4GB_TLB_SIZE) {
if (dev->get_pci_device()->bar4_wc != nullptr && tlb_description.size == BH_4GB_TLB_SIZE) {
// This is only for Blackhole. If we want to write to DRAM (BAR4 space), we add offset
// to which we write so write_block knows it needs to target BAR4
dev->write_block((tlb_offset + address % tlb_size) + BAR0_BH_SIZE, size_in_bytes, buffer_addr);
dev->write_block(
(tlb_description.tlb_offset + address % tlb_description.size) + BAR0_BH_SIZE,
size_in_bytes,
buffer_addr);
} else {
dev->write_block(tlb_offset + address % tlb_size, size_in_bytes, buffer_addr);
dev->write_block(tlb_description.tlb_offset + address % tlb_description.size, size_in_bytes, buffer_addr);
}
} else {
const auto tlb_index = dynamic_tlb_config.at(fallback_tlb);
Expand Down Expand Up @@ -1170,17 +1169,23 @@ void Cluster::read_device_memory(
log_debug(LogSiliconDriver, " tlb_index: {}, tlb_data.has_value(): {}", tlb_index, tlb_data.has_value());

if (is_tlb_mapped(target, address, size_in_bytes)) {
auto tlb_description = dev->get_architecture_implementation()->describe_tlb(
tlb_configuration tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(
map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y)));
auto [tlb_offset, tlb_size] = tlb_description.value();
if (dev->get_pci_device()->bar4_wc != nullptr && tlb_size == BH_4GB_TLB_SIZE) {
if (dev->get_pci_device()->bar4_wc != nullptr && tlb_description.size == BH_4GB_TLB_SIZE) {
// This is only for Blackhole. If we want to read from DRAM (BAR4 space), we add offset
// from which we read so read_block knows it needs to target BAR4
dev->read_block((tlb_offset + address % tlb_size) + BAR0_BH_SIZE, size_in_bytes, buffer_addr);
dev->read_block(
(tlb_description.tlb_offset + address % tlb_description.size) + BAR0_BH_SIZE,
size_in_bytes,
buffer_addr);
} else {
dev->read_block(tlb_offset + address % tlb_size, size_in_bytes, buffer_addr);
dev->read_block(tlb_description.tlb_offset + address % tlb_description.size, size_in_bytes, buffer_addr);
}
log_debug(LogSiliconDriver, " read_block called with tlb_offset: {}, tlb_size: {}", tlb_offset, tlb_size);
log_debug(
LogSiliconDriver,
" read_block called with tlb_offset: {}, tlb_size: {}",
tlb_description.tlb_offset,
tlb_description.size);
} else {
const auto tlb_index = dynamic_tlb_config.at(fallback_tlb);
const scoped_lock<named_mutex> lock(*get_mutex(fallback_tlb, target.chip));
Expand Down Expand Up @@ -1340,19 +1345,27 @@ Cluster::~Cluster() {
}

std::optional<std::tuple<uint32_t, uint32_t>> Cluster::get_tlb_data_from_target(const tt_cxy_pair& target) {
if (!is_tlb_mapped(target)) {
return std::nullopt;
}
auto tlb_configuration = get_tlb_configuration(target);
return std::tuple((uint32_t)tlb_configuration.tlb_offset, (uint32_t)tlb_configuration.size);
}

tlb_configuration Cluster::get_tlb_configuration(const tt_cxy_pair& target) {
log_assert(is_tlb_mapped(target), "TLB not mapped for core: {}", target.str());

int tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y));
return get_tt_device(target.chip)->get_architecture_implementation()->describe_tlb(tlb_index);
return get_tt_device(target.chip)->get_architecture_implementation()->get_tlb_configuration(tlb_index);
}

std::optional<std::tuple<uint32_t, uint32_t>> Cluster::get_tlb_data_from_target(const chip_id_t chip, CoreCoord core) {
const CoreCoord virtual_coord = translate_chip_coord(chip, core, CoordSystem::VIRTUAL);
return get_tlb_data_from_target({(size_t)chip, virtual_coord});
}

tlb_configuration Cluster::get_tlb_configuration(const chip_id_t chip, CoreCoord core) {
const CoreCoord virtual_coord = translate_chip_coord(chip, core, CoordSystem::VIRTUAL);
return get_tlb_configuration({(size_t)chip, virtual_coord});
}

void Cluster::configure_tlb(
chip_id_t logical_device_id, tt_xy_pair core, int32_t tlb_index, uint64_t address, uint64_t ordering) {
log_assert(
Expand All @@ -1378,7 +1391,7 @@ void Cluster::configure_tlb(
TTDevice* tt_device = get_tt_device(logical_device_id);
tt_device->set_dynamic_tlb(
tlb_index, harvested_coord_translation.at(logical_device_id).at(core), address, ordering);
auto tlb_size = std::get<1>(tt_device->get_architecture_implementation()->describe_tlb(tlb_index).value());
uint64_t tlb_size = tt_device->get_architecture_implementation()->get_tlb_configuration(tlb_index).size;
tlb_config_map.at(logical_device_id).insert({tlb_index, (address / tlb_size) * tlb_size});
map_core_to_tlb_per_chip.at(logical_device_id).insert({core, tlb_index});
}
Expand Down
36 changes: 6 additions & 30 deletions device/grayskull/grayskull_implementation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ tlb_configuration grayskull_implementation::get_tlb_configuration(uint32_t tlb_i
.base = grayskull::DYNAMIC_TLB_16M_BASE,
.cfg_addr = grayskull::DYNAMIC_TLB_16M_CFG_ADDR,
.index_offset = tlb_index - grayskull::TLB_BASE_INDEX_16M,
.tlb_offset = grayskull::DYNAMIC_TLB_16M_BASE +
(tlb_index - grayskull::TLB_BASE_INDEX_16M) * grayskull::DYNAMIC_TLB_16M_SIZE,
.offset = grayskull::TLB_16M_OFFSET,
};
} else if (tlb_index >= grayskull::TLB_BASE_INDEX_2M) {
Expand All @@ -33,6 +35,8 @@ tlb_configuration grayskull_implementation::get_tlb_configuration(uint32_t tlb_i
.base = grayskull::DYNAMIC_TLB_2M_BASE,
.cfg_addr = grayskull::DYNAMIC_TLB_2M_CFG_ADDR,
.index_offset = tlb_index - grayskull::TLB_BASE_INDEX_2M,
.tlb_offset = grayskull::DYNAMIC_TLB_2M_BASE +
(tlb_index - grayskull::TLB_BASE_INDEX_2M) * grayskull::DYNAMIC_TLB_2M_SIZE,
.offset = grayskull::TLB_2M_OFFSET,
};
} else {
Expand All @@ -41,41 +45,13 @@ tlb_configuration grayskull_implementation::get_tlb_configuration(uint32_t tlb_i
.base = grayskull::DYNAMIC_TLB_1M_BASE,
.cfg_addr = grayskull::DYNAMIC_TLB_1M_CFG_ADDR,
.index_offset = tlb_index - grayskull::TLB_BASE_INDEX_1M,
.tlb_offset = grayskull::DYNAMIC_TLB_1M_BASE +
(tlb_index - grayskull::TLB_BASE_INDEX_1M) * grayskull::DYNAMIC_TLB_1M_SIZE,
.offset = grayskull::TLB_1M_OFFSET,
};
}
}

std::optional<std::tuple<std::uint64_t, std::uint64_t>> grayskull_implementation::describe_tlb(
std::int32_t tlb_index) const {
std::uint32_t TLB_COUNT_1M = 156;
std::uint32_t TLB_COUNT_2M = 10;
std::uint32_t TLB_COUNT_16M = 20;

std::uint32_t TLB_BASE_1M = 0;
std::uint32_t TLB_BASE_2M = TLB_COUNT_1M * (1 << 20);
std::uint32_t TLB_BASE_16M = TLB_BASE_2M + TLB_COUNT_2M * (1 << 21);

if (tlb_index < 0) {
return std::nullopt;
}

if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M) {
std::uint32_t size = 1 << 20;
return std::tuple(TLB_BASE_1M + size * tlb_index, size);
} else if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M + TLB_COUNT_2M) {
auto tlb_offset = tlb_index - TLB_COUNT_1M;
auto size = 1 << 21;
return std::tuple(TLB_BASE_2M + tlb_offset * size, size);
} else if (tlb_index >= 0 and tlb_index < TLB_COUNT_1M + TLB_COUNT_2M + TLB_COUNT_16M) {
auto tlb_offset = tlb_index - (TLB_COUNT_1M + TLB_COUNT_2M);
auto size = 1 << 24;
return std::tuple(TLB_BASE_16M + tlb_offset * size, size);
}

return std::nullopt;
}

std::pair<std::uint64_t, std::uint64_t> grayskull_implementation::get_tlb_data(
std::uint32_t tlb_index, const tlb_data &data) const {
if (tlb_index < grayskull::TLB_COUNT_1M) {
Expand Down
35 changes: 6 additions & 29 deletions device/wormhole/wormhole_implementation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ tlb_configuration wormhole_implementation::get_tlb_configuration(uint32_t tlb_in
.base = wormhole::DYNAMIC_TLB_16M_BASE,
.cfg_addr = wormhole::DYNAMIC_TLB_16M_CFG_ADDR,
.index_offset = tlb_index - wormhole::TLB_BASE_INDEX_16M,
.tlb_offset = wormhole::DYNAMIC_TLB_16M_BASE +
(tlb_index - wormhole::TLB_BASE_INDEX_16M) * wormhole::DYNAMIC_TLB_16M_SIZE,
.offset = wormhole::TLB_16M_OFFSET,
};
} else if (tlb_index >= wormhole::TLB_BASE_INDEX_2M) {
Expand All @@ -39,6 +41,8 @@ tlb_configuration wormhole_implementation::get_tlb_configuration(uint32_t tlb_in
.base = wormhole::DYNAMIC_TLB_2M_BASE,
.cfg_addr = wormhole::DYNAMIC_TLB_2M_CFG_ADDR,
.index_offset = tlb_index - wormhole::TLB_BASE_INDEX_2M,
.tlb_offset = wormhole::DYNAMIC_TLB_2M_BASE +
(tlb_index - wormhole::TLB_BASE_INDEX_2M) * wormhole::DYNAMIC_TLB_2M_SIZE,
.offset = wormhole::TLB_2M_OFFSET,
};
} else {
Expand All @@ -47,40 +51,13 @@ tlb_configuration wormhole_implementation::get_tlb_configuration(uint32_t tlb_in
.base = wormhole::DYNAMIC_TLB_1M_BASE,
.cfg_addr = wormhole::DYNAMIC_TLB_1M_CFG_ADDR,
.index_offset = tlb_index - wormhole::TLB_BASE_INDEX_1M,
.tlb_offset = wormhole::DYNAMIC_TLB_1M_BASE +
(tlb_index - wormhole::TLB_BASE_INDEX_1M) * wormhole::DYNAMIC_TLB_1M_SIZE,
.offset = wormhole::TLB_1M_OFFSET,
};
}
}

std::optional<std::tuple<std::uint64_t, std::uint64_t>> wormhole_implementation::describe_tlb(
std::int32_t tlb_index) const {
std::uint32_t TLB_COUNT_1M = 156;
std::uint32_t TLB_COUNT_2M = 10;
std::uint32_t TLB_COUNT_16M = 20;

std::uint32_t TLB_BASE_1M = 0;
std::uint32_t TLB_BASE_2M = TLB_COUNT_1M * (1 << 20);
std::uint32_t TLB_BASE_16M = TLB_BASE_2M + TLB_COUNT_2M * (1 << 21);
if (tlb_index < 0) {
return std::nullopt;
}

if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M) {
std::uint32_t size = 1 << 20;
return std::tuple(TLB_BASE_1M + size * tlb_index, size);
} else if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M + TLB_COUNT_2M) {
auto tlb_offset = tlb_index - TLB_COUNT_1M;
auto size = 1 << 21;
return std::tuple(TLB_BASE_2M + tlb_offset * size, size);
} else if (tlb_index >= 0 and tlb_index < TLB_COUNT_1M + TLB_COUNT_2M + TLB_COUNT_16M) {
auto tlb_offset = tlb_index - (TLB_COUNT_1M + TLB_COUNT_2M);
auto size = 1 << 24;
return std::tuple(TLB_BASE_16M + tlb_offset * size, size);
}

return std::nullopt;
}

std::pair<std::uint64_t, std::uint64_t> wormhole_implementation::get_tlb_data(
std::uint32_t tlb_index, const tlb_data &data) const {
std::uint32_t TLB_COUNT_1M = 156;
Expand Down
Loading