Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
broskoTT committed Dec 18, 2024
1 parent db0f6c8 commit 8986d83
Show file tree
Hide file tree
Showing 10 changed files with 48 additions and 115 deletions.
1 change: 0 additions & 1 deletion device/api/umd/device/architecture_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ class architecture_implementation {

virtual std::tuple<xy_pair, xy_pair> multicast_workaround(xy_pair start, xy_pair end) const = 0;
virtual tlb_configuration get_tlb_configuration(uint32_t tlb_index) const = 0;
virtual std::optional<std::tuple<std::uint64_t, std::uint64_t>> describe_tlb(std::int32_t tlb_index) const = 0;
virtual std::pair<std::uint64_t, std::uint64_t> get_tlb_data(
std::uint32_t tlb_index, const tlb_data& data) const = 0;

Expand Down
1 change: 0 additions & 1 deletion device/api/umd/device/blackhole_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,6 @@ class blackhole_implementation : public architecture_implementation {

std::tuple<xy_pair, xy_pair> multicast_workaround(xy_pair start, xy_pair end) const override;
tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override;
std::optional<std::tuple<std::uint64_t, std::uint64_t>> describe_tlb(std::int32_t tlb_index) const override;
std::pair<std::uint64_t, std::uint64_t> get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override;

tt_device_l1_address_params get_l1_address_params() const override;
Expand Down
2 changes: 1 addition & 1 deletion device/api/umd/device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ class Cluster : public tt_device {
/**
* If the tlbs are initialized, returns a tuple with the TLB base address and its size
*/
std::optional<std::tuple<uint32_t, uint32_t>> get_tlb_data_from_target(const tt_cxy_pair& target);
tlb_configuration get_tlb_data_from_target(const tt_cxy_pair& target);
/**
* This API allows you to write directly to device memory that is addressable by a static TLB
*/
Expand Down
1 change: 0 additions & 1 deletion device/api/umd/device/grayskull_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,6 @@ class grayskull_implementation : public architecture_implementation {

std::tuple<xy_pair, xy_pair> multicast_workaround(xy_pair start, xy_pair end) const override;
tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override;
std::optional<std::tuple<std::uint64_t, std::uint64_t>> describe_tlb(std::int32_t tlb_index) const override;
std::pair<std::uint64_t, std::uint64_t> get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override;

tt_device_l1_address_params get_l1_address_params() const override;
Expand Down
7 changes: 4 additions & 3 deletions device/api/umd/device/types/tlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,10 @@ struct tlb_data {

struct tlb_configuration {
uint64_t size;
uint32_t base;
uint32_t cfg_addr;
uint32_t index_offset;
uint64_t base;
uint64_t cfg_addr;
uint64_t index_offset;
uint64_t tlb_offset;
tlb_offsets offset;
};

Expand Down
1 change: 0 additions & 1 deletion device/api/umd/device/wormhole_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,6 @@ class wormhole_implementation : public architecture_implementation {

std::tuple<xy_pair, xy_pair> multicast_workaround(xy_pair start, xy_pair end) const override;
tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override;
std::optional<std::tuple<std::uint64_t, std::uint64_t>> describe_tlb(std::int32_t tlb_index) const override;
std::pair<std::uint64_t, std::uint64_t> get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override;

tt_device_l1_address_params get_l1_address_params() const override;
Expand Down
28 changes: 4 additions & 24 deletions device/blackhole/blackhole_implementation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ tlb_configuration blackhole_implementation::get_tlb_configuration(uint32_t tlb_i
.base = blackhole::DYNAMIC_TLB_4G_BASE,
.cfg_addr = blackhole::DYNAMIC_TLB_4G_CFG_ADDR,
.index_offset = tlb_index - blackhole::TLB_BASE_INDEX_4G,
.tlb_offset = blackhole::DYNAMIC_TLB_4G_BASE +
(tlb_index - blackhole::TLB_BASE_INDEX_4G) * blackhole::DYNAMIC_TLB_4G_SIZE,
.offset = blackhole::TLB_4G_OFFSET,
};
}
Expand All @@ -43,34 +45,12 @@ tlb_configuration blackhole_implementation::get_tlb_configuration(uint32_t tlb_i
.base = blackhole::DYNAMIC_TLB_2M_BASE,
.cfg_addr = blackhole::DYNAMIC_TLB_2M_CFG_ADDR,
.index_offset = tlb_index - blackhole::TLB_BASE_INDEX_2M,
.tlb_offset = blackhole::DYNAMIC_TLB_2M_BASE +
(tlb_index - blackhole::TLB_BASE_INDEX_2M) * blackhole::DYNAMIC_TLB_2M_SIZE,
.offset = blackhole::TLB_2M_OFFSET,
};
}

std::optional<std::tuple<std::uint64_t, std::uint64_t>> blackhole_implementation::describe_tlb(
std::int32_t tlb_index) const {
std::uint32_t TLB_COUNT_2M = 202;

std::uint32_t TLB_BASE_2M = 0;
if (tlb_index < 0) {
return std::nullopt;
}

if (tlb_index >= TLB_COUNT_2M && tlb_index < TLB_COUNT_2M + blackhole::TLB_COUNT_4G) {
auto tlb_offset = tlb_index - TLB_COUNT_2M;
auto size = blackhole::TLB_4G_SIZE;
return std::tuple(blackhole::TLB_BASE_4G + tlb_offset * size, size);
}

if (tlb_index >= 0 && tlb_index < TLB_COUNT_2M) {
auto tlb_offset = tlb_index;
auto size = 1 << 21;
return std::tuple(TLB_BASE_2M + tlb_offset * size, size);
}

return std::nullopt;
}

std::pair<std::uint64_t, std::uint64_t> blackhole_implementation::get_tlb_data(
std::uint32_t tlb_index, const tlb_data& data) const {
if (tlb_index < blackhole::TLB_COUNT_2M) {
Expand Down
51 changes: 27 additions & 24 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,9 @@ bool Cluster::is_tlb_mapped(tt_cxy_pair target, uint64_t address, uint32_t size_
auto* dev = get_tt_device(target.chip);

int32_t tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y));
auto tlb_description = dev->get_architecture_implementation()->describe_tlb(tlb_index);
auto tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(tlb_index);

return tlb_description.has_value() &&
address_in_tlb_space(address, size_in_bytes, tlb_index, std::get<1>(tlb_description.value()), target.chip);
return address_in_tlb_space(address, size_in_bytes, tlb_index, tlb_description.size, target.chip);
}

void Cluster::initialize_interprocess_mutexes(int pci_interface_id, bool cleanup_mutexes_in_shm) {
Expand Down Expand Up @@ -1075,12 +1074,10 @@ tt::Writer Cluster::get_static_tlb_writer(tt_cxy_pair target) {
}

auto tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y));
auto tlb_data = dev->get_architecture_implementation()->describe_tlb(tlb_index);
auto tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(tlb_index);

auto [tlb_offset, tlb_size] = tlb_data.value();
auto* base = reinterpret_cast<uint8_t*>(dev->get_pci_device()->bar0_wc);

return tt::Writer(base + tlb_offset, tlb_size);
return tt::Writer(base + tlb_description.tlb_offset, tlb_description.size);
}

void Cluster::write_device_memory(
Expand All @@ -1103,15 +1100,17 @@ void Cluster::write_device_memory(
small_access);

if (is_tlb_mapped(target, address, size_in_bytes)) {
auto tlb_description = dev->get_architecture_implementation()->describe_tlb(
auto tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(
map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y)));
auto [tlb_offset, tlb_size] = tlb_description.value();
if (dev->get_pci_device()->bar4_wc != nullptr && tlb_size == BH_4GB_TLB_SIZE) {
if (dev->get_pci_device()->bar4_wc != nullptr && tlb_description.size == BH_4GB_TLB_SIZE) {
// This is only for Blackhole. If we want to write to DRAM (BAR4 space), we add offset
// to which we write so write_block knows it needs to target BAR4
dev->write_block((tlb_offset + address % tlb_size) + BAR0_BH_SIZE, size_in_bytes, buffer_addr);
dev->write_block(
(tlb_description.tlb_offset + address % tlb_description.size) + BAR0_BH_SIZE,
size_in_bytes,
buffer_addr);
} else {
dev->write_block(tlb_offset + address % tlb_size, size_in_bytes, buffer_addr);
dev->write_block(tlb_description.tlb_offset + address % tlb_description.size, size_in_bytes, buffer_addr);
}
} else {
const auto tlb_index = dynamic_tlb_config.at(fallback_tlb);
Expand Down Expand Up @@ -1151,17 +1150,23 @@ void Cluster::read_device_memory(
log_debug(LogSiliconDriver, " tlb_index: {}, tlb_data.has_value(): {}", tlb_index, tlb_data.has_value());

if (is_tlb_mapped(target, address, size_in_bytes)) {
auto tlb_description = dev->get_architecture_implementation()->describe_tlb(
auto tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(
map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y)));
auto [tlb_offset, tlb_size] = tlb_description.value();
if (dev->get_pci_device()->bar4_wc != nullptr && tlb_size == BH_4GB_TLB_SIZE) {
if (dev->get_pci_device()->bar4_wc != nullptr && tlb_description.size == BH_4GB_TLB_SIZE) {
// This is only for Blackhole. If we want to read from DRAM (BAR4 space), we add offset
// from which we read so read_block knows it needs to target BAR4
dev->read_block((tlb_offset + address % tlb_size) + BAR0_BH_SIZE, size_in_bytes, buffer_addr);
dev->read_block(
(tlb_description.tlb_offset + address % tlb_description.size) + BAR0_BH_SIZE,
size_in_bytes,
buffer_addr);
} else {
dev->read_block(tlb_offset + address % tlb_size, size_in_bytes, buffer_addr);
dev->read_block(tlb_description.tlb_offset + address % tlb_description.size, size_in_bytes, buffer_addr);
}
log_debug(LogSiliconDriver, " read_block called with tlb_offset: {}, tlb_size: {}", tlb_offset, tlb_size);
log_debug(
LogSiliconDriver,
" read_block called with tlb_offset: {}, tlb_size: {}",
tlb_description.tlb_offset,
tlb_description.size);
} else {
const auto tlb_index = dynamic_tlb_config.at(fallback_tlb);
const scoped_lock<named_mutex> lock(*get_mutex(fallback_tlb, dev->get_pci_device()->get_device_num()));
Expand Down Expand Up @@ -1321,13 +1326,11 @@ Cluster::~Cluster() {
dynamic_tlb_ordering_modes.clear();
}

std::optional<std::tuple<uint32_t, uint32_t>> Cluster::get_tlb_data_from_target(const tt_cxy_pair& target) {
if (!is_tlb_mapped(target)) {
return std::nullopt;
}
tlb_configuration Cluster::get_tlb_data_from_target(const tt_cxy_pair& target) {
log_assert(is_tlb_mapped(target), "TLB not mapped for core: {}", target.str());

int tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y));
return get_tt_device(target.chip)->get_architecture_implementation()->describe_tlb(tlb_index);
return get_tt_device(target.chip)->get_architecture_implementation()->get_tlb_configuration(tlb_index);
}

void Cluster::configure_tlb(
Expand All @@ -1354,7 +1357,7 @@ void Cluster::configure_tlb(

TTDevice* tt_device = get_tt_device(logical_device_id);
tt_device->set_dynamic_tlb(tlb_index, core, address, harvested_coord_translation.at(logical_device_id), ordering);
auto tlb_size = std::get<1>(tt_device->get_architecture_implementation()->describe_tlb(tlb_index).value());
auto tlb_size = tt_device->get_architecture_implementation()->get_tlb_configuration(tlb_index).size;
tlb_config_map.at(logical_device_id).insert({tlb_index, (address / tlb_size) * tlb_size});
map_core_to_tlb_per_chip.at(logical_device_id).insert({core, tlb_index});
}
Expand Down
36 changes: 6 additions & 30 deletions device/grayskull/grayskull_implementation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ tlb_configuration grayskull_implementation::get_tlb_configuration(uint32_t tlb_i
.base = grayskull::DYNAMIC_TLB_16M_BASE,
.cfg_addr = grayskull::DYNAMIC_TLB_16M_CFG_ADDR,
.index_offset = tlb_index - grayskull::TLB_BASE_INDEX_16M,
.tlb_offset = grayskull::DYNAMIC_TLB_16M_BASE +
(tlb_index - grayskull::TLB_BASE_INDEX_16M) * grayskull::DYNAMIC_TLB_16M_SIZE,
.offset = grayskull::TLB_16M_OFFSET,
};
} else if (tlb_index >= grayskull::TLB_BASE_INDEX_2M) {
Expand All @@ -33,6 +35,8 @@ tlb_configuration grayskull_implementation::get_tlb_configuration(uint32_t tlb_i
.base = grayskull::DYNAMIC_TLB_2M_BASE,
.cfg_addr = grayskull::DYNAMIC_TLB_2M_CFG_ADDR,
.index_offset = tlb_index - grayskull::TLB_BASE_INDEX_2M,
.tlb_offset = grayskull::DYNAMIC_TLB_2M_BASE +
(tlb_index - grayskull::TLB_BASE_INDEX_2M) * grayskull::DYNAMIC_TLB_2M_SIZE,
.offset = grayskull::TLB_2M_OFFSET,
};
} else {
Expand All @@ -41,41 +45,13 @@ tlb_configuration grayskull_implementation::get_tlb_configuration(uint32_t tlb_i
.base = grayskull::DYNAMIC_TLB_1M_BASE,
.cfg_addr = grayskull::DYNAMIC_TLB_1M_CFG_ADDR,
.index_offset = tlb_index - grayskull::TLB_BASE_INDEX_1M,
.tlb_offset = grayskull::DYNAMIC_TLB_1M_BASE +
(tlb_index - grayskull::TLB_BASE_INDEX_1M) * grayskull::DYNAMIC_TLB_1M_SIZE,
.offset = grayskull::TLB_1M_OFFSET,
};
}
}

std::optional<std::tuple<std::uint64_t, std::uint64_t>> grayskull_implementation::describe_tlb(
std::int32_t tlb_index) const {
std::uint32_t TLB_COUNT_1M = 156;
std::uint32_t TLB_COUNT_2M = 10;
std::uint32_t TLB_COUNT_16M = 20;

std::uint32_t TLB_BASE_1M = 0;
std::uint32_t TLB_BASE_2M = TLB_COUNT_1M * (1 << 20);
std::uint32_t TLB_BASE_16M = TLB_BASE_2M + TLB_COUNT_2M * (1 << 21);

if (tlb_index < 0) {
return std::nullopt;
}

if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M) {
std::uint32_t size = 1 << 20;
return std::tuple(TLB_BASE_1M + size * tlb_index, size);
} else if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M + TLB_COUNT_2M) {
auto tlb_offset = tlb_index - TLB_COUNT_1M;
auto size = 1 << 21;
return std::tuple(TLB_BASE_2M + tlb_offset * size, size);
} else if (tlb_index >= 0 and tlb_index < TLB_COUNT_1M + TLB_COUNT_2M + TLB_COUNT_16M) {
auto tlb_offset = tlb_index - (TLB_COUNT_1M + TLB_COUNT_2M);
auto size = 1 << 24;
return std::tuple(TLB_BASE_16M + tlb_offset * size, size);
}

return std::nullopt;
}

std::pair<std::uint64_t, std::uint64_t> grayskull_implementation::get_tlb_data(
std::uint32_t tlb_index, const tlb_data &data) const {
if (tlb_index < grayskull::TLB_COUNT_1M) {
Expand Down
35 changes: 6 additions & 29 deletions device/wormhole/wormhole_implementation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ tlb_configuration wormhole_implementation::get_tlb_configuration(uint32_t tlb_in
.base = wormhole::DYNAMIC_TLB_16M_BASE,
.cfg_addr = wormhole::DYNAMIC_TLB_16M_CFG_ADDR,
.index_offset = tlb_index - wormhole::TLB_BASE_INDEX_16M,
.tlb_offset = wormhole::DYNAMIC_TLB_16M_BASE +
(tlb_index - wormhole::TLB_BASE_INDEX_16M) * wormhole::DYNAMIC_TLB_16M_SIZE,
.offset = wormhole::TLB_16M_OFFSET,
};
} else if (tlb_index >= wormhole::TLB_BASE_INDEX_2M) {
Expand All @@ -39,6 +41,8 @@ tlb_configuration wormhole_implementation::get_tlb_configuration(uint32_t tlb_in
.base = wormhole::DYNAMIC_TLB_2M_BASE,
.cfg_addr = wormhole::DYNAMIC_TLB_2M_CFG_ADDR,
.index_offset = tlb_index - wormhole::TLB_BASE_INDEX_2M,
.tlb_offset = wormhole::DYNAMIC_TLB_2M_BASE +
(tlb_index - wormhole::TLB_BASE_INDEX_2M) * wormhole::DYNAMIC_TLB_2M_SIZE,
.offset = wormhole::TLB_2M_OFFSET,
};
} else {
Expand All @@ -47,40 +51,13 @@ tlb_configuration wormhole_implementation::get_tlb_configuration(uint32_t tlb_in
.base = wormhole::DYNAMIC_TLB_1M_BASE,
.cfg_addr = wormhole::DYNAMIC_TLB_1M_CFG_ADDR,
.index_offset = tlb_index - wormhole::TLB_BASE_INDEX_1M,
.tlb_offset = wormhole::DYNAMIC_TLB_1M_BASE +
(tlb_index - wormhole::TLB_BASE_INDEX_1M) * wormhole::DYNAMIC_TLB_1M_SIZE,
.offset = wormhole::TLB_1M_OFFSET,
};
}
}

std::optional<std::tuple<std::uint64_t, std::uint64_t>> wormhole_implementation::describe_tlb(
std::int32_t tlb_index) const {
std::uint32_t TLB_COUNT_1M = 156;
std::uint32_t TLB_COUNT_2M = 10;
std::uint32_t TLB_COUNT_16M = 20;

std::uint32_t TLB_BASE_1M = 0;
std::uint32_t TLB_BASE_2M = TLB_COUNT_1M * (1 << 20);
std::uint32_t TLB_BASE_16M = TLB_BASE_2M + TLB_COUNT_2M * (1 << 21);
if (tlb_index < 0) {
return std::nullopt;
}

if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M) {
std::uint32_t size = 1 << 20;
return std::tuple(TLB_BASE_1M + size * tlb_index, size);
} else if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M + TLB_COUNT_2M) {
auto tlb_offset = tlb_index - TLB_COUNT_1M;
auto size = 1 << 21;
return std::tuple(TLB_BASE_2M + tlb_offset * size, size);
} else if (tlb_index >= 0 and tlb_index < TLB_COUNT_1M + TLB_COUNT_2M + TLB_COUNT_16M) {
auto tlb_offset = tlb_index - (TLB_COUNT_1M + TLB_COUNT_2M);
auto size = 1 << 24;
return std::tuple(TLB_BASE_16M + tlb_offset * size, size);
}

return std::nullopt;
}

std::pair<std::uint64_t, std::uint64_t> wormhole_implementation::get_tlb_data(
std::uint32_t tlb_index, const tlb_data &data) const {
std::uint32_t TLB_COUNT_1M = 156;
Expand Down

0 comments on commit 8986d83

Please sign in to comment.