From e157d9b83b1e928d6e38473885db5ef4897199ef Mon Sep 17 00:00:00 2001 From: Pavle Janevski <165378935+pjanevskiTT@users.noreply.github.com> Date: Wed, 18 Dec 2024 12:16:04 +0100 Subject: [PATCH 1/4] Fix warning for grayskull e75 boards (#414) --- device/api/umd/device/tt_cluster_descriptor.h | 14 ++++++----- device/tt_cluster_descriptor.cpp | 14 +++++++---- .../grayskull_e75.yaml | 23 +++++++++++++++++++ tests/api/test_cluster_descriptor.cpp | 1 + 4 files changed, 41 insertions(+), 11 deletions(-) create mode 100644 tests/api/cluster_descriptor_examples/grayskull_e75.yaml diff --git a/device/api/umd/device/tt_cluster_descriptor.h b/device/api/umd/device/tt_cluster_descriptor.h index b7de3fdd..ef83051f 100644 --- a/device/api/umd/device/tt_cluster_descriptor.h +++ b/device/api/umd/device/tt_cluster_descriptor.h @@ -25,12 +25,14 @@ class Node; } enum BoardType : uint32_t { - N150 = 0, - N300 = 1, - E150 = 2, - P150A = 3, - GALAXY = 4, - UNKNOWN = 5, + E75 = 0, + E150 = 1, + E300 = 2, + N150 = 3, + N300 = 4, + P150A = 5, + GALAXY = 6, + UNKNOWN = 7, }; class tt_ClusterDescriptor { diff --git a/device/tt_cluster_descriptor.cpp b/device/tt_cluster_descriptor.cpp index bc35ceb9..04b8beb6 100644 --- a/device/tt_cluster_descriptor.cpp +++ b/device/tt_cluster_descriptor.cpp @@ -737,16 +737,20 @@ void tt_ClusterDescriptor::load_chips_from_connectivity_descriptor(YAML::Node &y for (const auto &chip_board_type : yaml["boardtype"].as>()) { auto &chip = chip_board_type.first; BoardType board_type; - if (chip_board_type.second == "n150") { + if (chip_board_type.second == "e75") { + board_type = BoardType::E75; + } else if (chip_board_type.second == "e150") { + board_type = BoardType::E150; + } else if (chip_board_type.second == "e300") { + board_type = BoardType::E300; + } else if (chip_board_type.second == "n150") { board_type = BoardType::N150; } else if (chip_board_type.second == "n300") { board_type = BoardType::N300; - } else if (chip_board_type.second == "GALAXY") { - board_type = BoardType::GALAXY; - } else if (chip_board_type.second == "e150") { - board_type = BoardType::E150; } else if (chip_board_type.second == "p150A") { board_type = BoardType::P150A; + } else if (chip_board_type.second == "GALAXY") { + board_type = BoardType::GALAXY; } else { log_warning( LogSiliconDriver, diff --git a/tests/api/cluster_descriptor_examples/grayskull_e75.yaml b/tests/api/cluster_descriptor_examples/grayskull_e75.yaml new file mode 100644 index 00000000..c8e1541f --- /dev/null +++ b/tests/api/cluster_descriptor_examples/grayskull_e75.yaml @@ -0,0 +1,23 @@ +arch: { + 0: Grayskull, +} + +chips: { +} + +ethernet_connections: [ +] + +chips_with_mmio: [ + 0: 0, +] + +# harvest_mask is the bit indicating which tensix row is harvested. So bit 0 = first tensix row; bit 1 = second tensix row etc... +harvesting: { + 0: {noc_translation: false, harvest_mask: 0}, +} + +# This value will be null if the boardtype is unknown, should never happen in practice but to be defensive it would be useful to throw an error on this case. +boardtype: { + 0: e75, +} \ No newline at end of file diff --git a/tests/api/test_cluster_descriptor.cpp b/tests/api/test_cluster_descriptor.cpp index 68a0a61e..19c0adc6 100644 --- a/tests/api/test_cluster_descriptor.cpp +++ b/tests/api/test_cluster_descriptor.cpp @@ -75,6 +75,7 @@ TEST(ApiClusterDescriptorTest, TestAllOfflineClusterDescriptors) { for (std::string cluster_desc_yaml : { "blackhole_P150.yaml", "galaxy.yaml", + "grayskull_e75.yaml", "grayskull_E150.yaml", "grayskull_E300.yaml", "wormhole_2xN300_unconnected.yaml", From d774668fcc1dfb58fa36e10377404567673fbd3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bojan=20Ro=C5=A1ko?= <156314064+broskoTT@users.noreply.github.com> Date: Wed, 18 Dec 2024 14:08:50 +0100 Subject: [PATCH 2/4] Reduce CI load (#415) ### Issue No issue ### Description We are running same tests on both versions of ubuntu. There is no need to think that tests would fall on one of the version and not the other, and we never saw this case. I still intentionally left building on ubuntu 22 though. I chose ubuntu-20 since this is the same one as used in tt-metal. ### List of the changes - Changed build-tests and run-tests to accept ubuntu version argument - Added required ubuntu versions in build_and_run tests. Build on both 20 and 22, run tests only on 20. - I'll also add these tests as required for PRs after this PR. ### Testing CI tests on this PR ### API Changes There are no API changes in this PR. --- .github/workflows/build-and-run-all-tests.yml | 11 ++++++++ .github/workflows/build-tests.yml | 25 +++++++++++-------- .github/workflows/run-tests.yml | 23 +++++++++-------- 3 files changed, 38 insertions(+), 21 deletions(-) diff --git a/.github/workflows/build-and-run-all-tests.yml b/.github/workflows/build-and-run-all-tests.yml index ff75e6c2..1469f5f8 100644 --- a/.github/workflows/build-and-run-all-tests.yml +++ b/.github/workflows/build-and-run-all-tests.yml @@ -19,9 +19,14 @@ jobs: {arch: wormhole_b0}, {arch: blackhole}, ] + ubuntu-version: [ + 'ubuntu-22.04', + 'ubuntu-20.04', + ] uses: ./.github/workflows/build-tests.yml with: arch: ${{ matrix.test-group.arch}} + ubuntu-version: ${{ matrix.ubuntu-version}} timeout: 15 test-all: @@ -39,8 +44,14 @@ jobs: # Enable once we have functional cards. # {arch: blackhole}, ] + ubuntu-version: [ + # Running tests on ubuntu-20.04 should be sufficient. Reduce load on CI. + # 'ubuntu-22.04', + 'ubuntu-20.04', + ] uses: ./.github/workflows/run-tests.yml with: arch: ${{ matrix.test-group.arch}} + ubuntu-version: ${{ matrix.ubuntu-version}} card: ${{ matrix.test-group.card}} timeout: ${{ matrix.test-group.timeout}} diff --git a/.github/workflows/build-tests.yml b/.github/workflows/build-tests.yml index bff1d0af..ad5e53ac 100644 --- a/.github/workflows/build-tests.yml +++ b/.github/workflows/build-tests.yml @@ -8,6 +8,9 @@ on: arch: required: true type: string + ubuntu-version: + required: true + type: string timeout: required: true type: number @@ -21,6 +24,13 @@ on: - grayskull - wormhole_b0 - blackhole + ubuntu-version: + required: true + description: 'The version of Ubuntu to build on' + type: choice + options: + - ubuntu-22.04 + - ubuntu-20.04 timeout: required: true description: 'The timeout for the build job in minutes' @@ -40,18 +50,11 @@ jobs: build: # Due to parsing bug, fromJSON is used to convert string to number timeout-minutes: ${{ fromJSON(inputs.timeout) }} - strategy: - fail-fast: false - matrix: - build: [ - {runs-on: ubuntu-22.04, docker-image: tt-umd-ci-ubuntu-22.04}, - {runs-on: ubuntu-20.04, docker-image: tt-umd-ci-ubuntu-20.04}, - ] - name: Build umd_tests for ${{ inputs.arch }} on ${{ matrix.build.runs-on }} - runs-on: ${{ matrix.build.runs-on }} + name: Build umd_tests for ${{ inputs.arch }} on ${{ inputs.ubuntu-version }} + runs-on: ${{ inputs.ubuntu-version }} container: - image: ghcr.io/${{ github.repository }}/${{ matrix.build.docker-image }}:latest + image: ghcr.io/${{ github.repository }}/tt-umd-ci-${{ inputs.ubuntu-version }}:latest options: --user root env: @@ -84,5 +87,5 @@ jobs: - name: Upload build artifacts archive uses: actions/upload-artifact@v4 with: - name: build-artifacts-${{ inputs.arch }}-${{ matrix.build.runs-on }} + name: build-artifacts-${{ inputs.arch }}-${{ inputs.ubuntu-version }} path: artifact.tar diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index e9935dcd..4215dd51 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -7,6 +7,9 @@ on: arch: required: true type: string + ubuntu-version: + required: true + type: string card: required: true type: string @@ -23,6 +26,13 @@ on: - grayskull - wormhole_b0 - blackhole + ubuntu-version: + required: true + description: 'The version of Ubuntu to build on' + type: choice + options: + - ubuntu-22.04 + - ubuntu-20.04 card: required: true description: 'The card to run tests on' @@ -47,20 +57,13 @@ jobs: test: # Due to parsing bug, fromJSON is used to convert string to number timeout-minutes: ${{ fromJSON(inputs.timeout) }} - strategy: - fail-fast: false - matrix: - build: [ - {runs-on: ubuntu-22.04, docker-image: tt-umd-ci-ubuntu-22.04}, - {runs-on: ubuntu-20.04, docker-image: tt-umd-ci-ubuntu-20.04}, - ] - name: Run tests for ${{ inputs.arch }} on ${{ inputs.card }} on ${{ matrix.build.runs-on }} + name: Run tests for ${{ inputs.arch }} on ${{ inputs.card }} on ${{ inputs.ubuntu-version }} runs-on: - self-hosted - ${{ inputs.card }} container: - image: ghcr.io/${{ github.repository }}/${{ matrix.build.docker-image }}:latest + image: ghcr.io/${{ github.repository }}/tt-umd-ci-${{ inputs.ubuntu-version }}:latest options: --user root --device /dev/tenstorrent/0 volumes: - /dev/hugepages:/dev/hugepages @@ -83,7 +86,7 @@ jobs: - name: Use build artifacts uses: actions/download-artifact@v4 with: - name: build-artifacts-${{ inputs.arch }}-${{ matrix.build.runs-on }} + name: build-artifacts-${{ inputs.arch }}-${{ inputs.ubuntu-version }} path: ./ # This is needed to preserve file permissions From db0f6c801284ea628b60cda271011527d4375672 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bojan=20Ro=C5=A1ko?= <156314064+broskoTT@users.noreply.github.com> Date: Wed, 18 Dec 2024 14:55:52 +0100 Subject: [PATCH 3/4] Move tlb.h to types (#412) ### Issue Related to #417 ### Description ### List of the changes - Moved tlb.h to types folder, since it holds only types ### Testing Code builds. ### API Changes There are no API changes in this PR. --- device/api/umd/device/architecture_implementation.h | 2 +- device/api/umd/device/blackhole_implementation.h | 2 +- device/api/umd/device/cluster.h | 2 +- device/api/umd/device/grayskull_implementation.h | 2 +- device/api/umd/device/pci_device.hpp | 2 +- device/api/umd/device/{ => types}/tlb.h | 0 device/api/umd/device/wormhole_implementation.h | 2 +- device/cluster.cpp | 2 +- device/tlb.cpp | 2 +- 9 files changed, 8 insertions(+), 8 deletions(-) rename device/api/umd/device/{ => types}/tlb.h (100%) diff --git a/device/api/umd/device/architecture_implementation.h b/device/api/umd/device/architecture_implementation.h index bfd1c36b..8935621f 100644 --- a/device/api/umd/device/architecture_implementation.h +++ b/device/api/umd/device/architecture_implementation.h @@ -11,9 +11,9 @@ #include #include -#include "umd/device/tlb.h" #include "umd/device/tt_xy_pair.h" #include "umd/device/types/arch.h" +#include "umd/device/types/tlb.h" #include "umd/device/types/xy_pair.h" struct tt_device_l1_address_params; diff --git a/device/api/umd/device/blackhole_implementation.h b/device/api/umd/device/blackhole_implementation.h index a80b4fd6..b248bcfa 100644 --- a/device/api/umd/device/blackhole_implementation.h +++ b/device/api/umd/device/blackhole_implementation.h @@ -10,7 +10,7 @@ #include #include "umd/device/architecture_implementation.h" -#include "umd/device/tlb.h" +#include "umd/device/types/tlb.h" namespace tt::umd { diff --git a/device/api/umd/device/cluster.h b/device/api/umd/device/cluster.h index 76e96cf7..7bde8beb 100644 --- a/device/api/umd/device/cluster.h +++ b/device/api/umd/device/cluster.h @@ -18,12 +18,12 @@ #include "tt_soc_descriptor.h" #include "tt_xy_pair.h" #include "umd/device/chip/chip.h" -#include "umd/device/tlb.h" #include "umd/device/tt_device/tt_device.h" #include "umd/device/tt_io.hpp" #include "umd/device/types/arch.h" #include "umd/device/types/cluster_descriptor_types.h" #include "umd/device/types/cluster_types.h" +#include "umd/device/types/tlb.h" using TLB_DATA = tt::umd::tlb_data; diff --git a/device/api/umd/device/grayskull_implementation.h b/device/api/umd/device/grayskull_implementation.h index e7c9ed42..ab33f15d 100644 --- a/device/api/umd/device/grayskull_implementation.h +++ b/device/api/umd/device/grayskull_implementation.h @@ -9,7 +9,7 @@ #include #include "architecture_implementation.h" -#include "umd/device/tlb.h" +#include "umd/device/types/tlb.h" namespace tt::umd { diff --git a/device/api/umd/device/pci_device.hpp b/device/api/umd/device/pci_device.hpp index ce4b6392..edad710e 100644 --- a/device/api/umd/device/pci_device.hpp +++ b/device/api/umd/device/pci_device.hpp @@ -14,9 +14,9 @@ #include "fmt/format.h" #include "umd/device/semver.hpp" -#include "umd/device/tlb.h" #include "umd/device/tt_xy_pair.h" #include "umd/device/types/arch.h" +#include "umd/device/types/tlb.h" namespace tt::umd { class semver_t; diff --git a/device/api/umd/device/tlb.h b/device/api/umd/device/types/tlb.h similarity index 100% rename from device/api/umd/device/tlb.h rename to device/api/umd/device/types/tlb.h diff --git a/device/api/umd/device/wormhole_implementation.h b/device/api/umd/device/wormhole_implementation.h index 375cf6ad..ce1bf036 100644 --- a/device/api/umd/device/wormhole_implementation.h +++ b/device/api/umd/device/wormhole_implementation.h @@ -9,7 +9,7 @@ #include #include "architecture_implementation.h" -#include "umd/device/tlb.h" +#include "umd/device/types/tlb.h" namespace tt::umd { diff --git a/device/cluster.cpp b/device/cluster.cpp index 543d1e00..6c70f075 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -43,11 +43,11 @@ #include "umd/device/chip/remote_chip.h" #include "umd/device/driver_atomics.h" #include "umd/device/hugepage.h" -#include "umd/device/tlb.h" #include "umd/device/tt_cluster_descriptor.h" #include "umd/device/tt_core_coordinates.h" #include "umd/device/tt_soc_descriptor.h" #include "umd/device/types/arch.h" +#include "umd/device/types/tlb.h" #include "yaml-cpp/yaml.h" using namespace boost::interprocess; diff --git a/device/tlb.cpp b/device/tlb.cpp index 6168c9ad..14527655 100644 --- a/device/tlb.cpp +++ b/device/tlb.cpp @@ -2,7 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "umd/device/tlb.h" +#include "umd/device/types/tlb.h" namespace tt::umd { From e454c69f23c0f284bf15149cc477dc158f40935e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bojan=20Ro=C5=A1ko?= <156314064+broskoTT@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:04:30 +0100 Subject: [PATCH 4/4] Remove harvested_coords argument from dynamic_tlb (#413) ### Issue Related to #417 ### Description This argument should not be passed down to tlb. This effectively changes the required coordinates for calling set_dynamic_tlb from virtual to physical. Hopefully these will become more apparent after changes regarding introducing CoreCoords everywhere. ### List of the changes - Removed harvested_coord_translation from functions related to set_dynamic_tlb in TTDevice - Pass coords through harvested_coord_translation before sending them to set_dynamic_tlb ### Testing CI tests should cover enough testing. ### API Changes There are no API changes in this PR. --- device/api/umd/device/cluster.h | 1 - device/api/umd/device/tt_device/tt_device.h | 3 -- device/cluster.cpp | 48 ++++++++------------- device/tt_device/tt_device.cpp | 28 ++++-------- 4 files changed, 27 insertions(+), 53 deletions(-) diff --git a/device/api/umd/device/cluster.h b/device/api/umd/device/cluster.h index 7bde8beb..06b8cbdf 100644 --- a/device/api/umd/device/cluster.h +++ b/device/api/umd/device/cluster.h @@ -609,7 +609,6 @@ class Cluster : public tt_device { static void harvest_rows_in_soc_descriptor(tt::ARCH arch, tt_SocDescriptor& sdesc, uint32_t harvested_rows); static std::unordered_map create_harvested_coord_translation( const tt::ARCH arch, bool identity_map); - std::unordered_map get_harvested_coord_translation_map(chip_id_t logical_device_id); virtual std::uint32_t get_num_dram_channels(std::uint32_t device_id); virtual std::uint64_t get_dram_channel_size(std::uint32_t device_id, std::uint32_t channel); virtual std::uint32_t get_num_host_channels(std::uint32_t device_id); diff --git a/device/api/umd/device/tt_device/tt_device.h b/device/api/umd/device/tt_device/tt_device.h index 056d0f08..9a35c8d8 100644 --- a/device/api/umd/device/tt_device/tt_device.h +++ b/device/api/umd/device/tt_device/tt_device.h @@ -69,18 +69,15 @@ class TTDevice { tt_xy_pair end, std::uint64_t address, bool multicast, - std::unordered_map &harvested_coord_translation, std::uint64_t ordering); dynamic_tlb set_dynamic_tlb( unsigned int tlb_index, tt_xy_pair target, std::uint64_t address, - std::unordered_map &harvested_coord_translation, std::uint64_t ordering = tt::umd::tlb_data::Relaxed); dynamic_tlb set_dynamic_tlb_broadcast( unsigned int tlb_index, std::uint64_t address, - std::unordered_map &harvested_coord_translation, tt_xy_pair start, tt_xy_pair end, std::uint64_t ordering = tt::umd::tlb_data::Relaxed); diff --git a/device/cluster.cpp b/device/cluster.cpp index 6c70f075..254e2fe2 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -299,10 +299,6 @@ void Cluster::create_device( bool Cluster::using_harvested_soc_descriptors() { return perform_harvesting_on_sdesc && performed_harvesting; } -std::unordered_map Cluster::get_harvested_coord_translation_map(chip_id_t logical_device_id) { - return harvested_coord_translation.at(logical_device_id); -} - std::unordered_map Cluster::get_harvesting_masks_for_soc_descriptors() { if (using_harvested_soc_descriptors()) { return harvested_rows_per_target; @@ -962,11 +958,10 @@ void Cluster::broadcast_pcie_tensix_risc_reset(chip_id_t chip_id, const TensixSo auto [soft_reset_reg, _] = tt_device->set_dynamic_tlb_broadcast( architecture_implementation->get_reg_tlb(), architecture_implementation->get_tensix_soft_reset_addr(), - harvested_coord_translation.at(chip_id), - tt_xy_pair(0, 0), - tt_xy_pair( + harvested_coord_translation.at(chip_id).at(tt_xy_pair(0, 0)), + harvested_coord_translation.at(chip_id).at(tt_xy_pair( architecture_implementation->get_grid_size_x() - 1, - architecture_implementation->get_grid_size_y() - 1 - num_rows_harvested.at(chip_id)), + architecture_implementation->get_grid_size_y() - 1 - num_rows_harvested.at(chip_id))), TLB_DATA::Posted); tt_device->write_regs(soft_reset_reg, 1, &valid); tt_driver_atomics::sfence(); @@ -1120,9 +1115,8 @@ void Cluster::write_device_memory( while (size_in_bytes > 0) { auto [mapped_address, tlb_size] = dev->set_dynamic_tlb( tlb_index, - target, + harvested_coord_translation.at(target.chip).at(target), address, - harvested_coord_translation.at(target.chip), dynamic_tlb_ordering_modes.at(fallback_tlb)); uint32_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size); dev->write_block(mapped_address, transfer_size, buffer_addr); @@ -1169,9 +1163,8 @@ void Cluster::read_device_memory( while (size_in_bytes > 0) { auto [mapped_address, tlb_size] = dev->set_dynamic_tlb( tlb_index, - target, + harvested_coord_translation.at(target.chip).at(target), address, - harvested_coord_translation.at(target.chip), dynamic_tlb_ordering_modes.at(fallback_tlb)); uint32_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size); dev->read_block(mapped_address, transfer_size, buffer_addr); @@ -1353,7 +1346,8 @@ void Cluster::configure_tlb( tlb_index); TTDevice* tt_device = get_tt_device(logical_device_id); - tt_device->set_dynamic_tlb(tlb_index, core, address, harvested_coord_translation.at(logical_device_id), ordering); + tt_device->set_dynamic_tlb( + tlb_index, harvested_coord_translation.at(logical_device_id).at(core), address, ordering); auto tlb_size = std::get<1>(tt_device->get_architecture_implementation()->describe_tlb(tlb_index).value()); tlb_config_map.at(logical_device_id).insert({tlb_index, (address / tlb_size) * tlb_size}); map_core_to_tlb_per_chip.at(logical_device_id).insert({core, tlb_index}); @@ -1447,9 +1441,8 @@ int Cluster::test_setup_interface() { uint32_t mapped_reg = tt_device ->set_dynamic_tlb( tt_device->get_architecture_implementation()->get_reg_tlb(), - tt_xy_pair(0, 0), - 0xffb20108, - harvested_coord_translation.at(chip_id)) + harvested_coord_translation.at(chip_id).at(tt_xy_pair(0, 0)), + 0xffb20108) .bar_offset; uint32_t regval = 0; @@ -1460,9 +1453,8 @@ int Cluster::test_setup_interface() { uint32_t mapped_reg = tt_device ->set_dynamic_tlb( tt_device->get_architecture_implementation()->get_reg_tlb(), - tt_xy_pair(1, 0), - 0xffb20108, - harvested_coord_translation.at(chip_id)) + harvested_coord_translation.at(chip_id).at(tt_xy_pair(1, 0)), + 0xffb20108) .bar_offset; uint32_t regval = 0; @@ -1475,9 +1467,8 @@ int Cluster::test_setup_interface() { // uint32_t mapped_reg = tt_device // ->set_dynamic_tlb( // tt_device->get_architecture_implementation()->get_reg_tlb(), - // tt_xy_pair(1, 0), - // 0xffb20108, - // harvested_coord_translation.at(logical_device_id)) + // harvested_coord_translation.at(chip_id).at(tt_xy_pair(1, 0)), + // 0xffb20108) // .bar_offset; // uint32_t regval = 0; @@ -2525,9 +2516,8 @@ void Cluster::pcie_broadcast_write( auto [mapped_address, tlb_size] = tt_device->set_dynamic_tlb_broadcast( tlb_index, addr, - harvested_coord_translation.at(chip), - start, - end, + harvested_coord_translation.at(chip).at(start), + harvested_coord_translation.at(chip).at(end), dynamic_tlb_ordering_modes.at(fallback_tlb)); uint64_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size); tt_device->write_block(mapped_address, transfer_size, buffer_addr); @@ -3008,8 +2998,8 @@ void Cluster::read_mmio_device_register( const scoped_lock lock(*get_mutex(fallback_tlb, tt_device->get_pci_device()->get_device_num())); log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index); - auto [mapped_address, tlb_size] = - tt_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation.at(core.chip), TLB_DATA::Strict); + auto [mapped_address, tlb_size] = tt_device->set_dynamic_tlb( + tlb_index, harvested_coord_translation.at(core.chip).at(core), addr, TLB_DATA::Strict); // Align block to 4bytes if needed. auto aligned_buf = tt_4_byte_aligned_buffer(mem_ptr, size); tt_device->read_regs(mapped_address, aligned_buf.block_size / sizeof(std::uint32_t), aligned_buf.local_storage); @@ -3028,8 +3018,8 @@ void Cluster::write_mmio_device_register( const scoped_lock lock(*get_mutex(fallback_tlb, tt_device->get_pci_device()->get_device_num())); log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index); - auto [mapped_address, tlb_size] = - tt_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation.at(core.chip), TLB_DATA::Strict); + auto [mapped_address, tlb_size] = tt_device->set_dynamic_tlb( + tlb_index, harvested_coord_translation.at(core.chip).at(core), addr, TLB_DATA::Strict); // Align block to 4bytes if needed. auto aligned_buf = tt_4_byte_aligned_buffer(mem_ptr, size); if (aligned_buf.input_size != aligned_buf.block_size) { diff --git a/device/tt_device/tt_device.cpp b/device/tt_device/tt_device.cpp index e4ecca50..8f74bad9 100644 --- a/device/tt_device/tt_device.cpp +++ b/device/tt_device/tt_device.cpp @@ -232,7 +232,6 @@ dynamic_tlb TTDevice::set_dynamic_tlb( tt_xy_pair end, std::uint64_t address, bool multicast, - std::unordered_map &harvested_coord_translation, std::uint64_t ordering) { if (multicast) { std::tie(start, end) = architecture_impl_->multicast_workaround(start, end); @@ -253,8 +252,6 @@ dynamic_tlb TTDevice::set_dynamic_tlb( tt::umd::tlb_configuration tlb_config = architecture_impl_->get_tlb_configuration(tlb_index); std::uint32_t TLB_CFG_REG_SIZE_BYTES = architecture_impl_->get_tlb_cfg_reg_size_bytes(); - auto translated_start_coords = harvested_coord_translation.at(start); - auto translated_end_coords = harvested_coord_translation.at(end); uint64_t tlb_address = address / tlb_config.size; uint32_t local_address = address % tlb_config.size; uint64_t tlb_base = tlb_config.base + (tlb_config.size * tlb_config.index_offset); @@ -263,10 +260,10 @@ dynamic_tlb TTDevice::set_dynamic_tlb( std::pair tlb_data = tt::umd::tlb_data{ .local_offset = tlb_address, - .x_end = static_cast(translated_end_coords.x), - .y_end = static_cast(translated_end_coords.y), - .x_start = static_cast(translated_start_coords.x), - .y_start = static_cast(translated_start_coords.y), + .x_end = static_cast(end.x), + .y_end = static_cast(end.y), + .x_start = static_cast(start.x), + .y_start = static_cast(start.y), .mcast = multicast, .ordering = ordering, // TODO #2715: hack for Blackhole A0, will potentially be fixed in B0. @@ -291,23 +288,14 @@ dynamic_tlb TTDevice::set_dynamic_tlb( } dynamic_tlb TTDevice::set_dynamic_tlb( - unsigned int tlb_index, - tt_xy_pair target, - std::uint64_t address, - std::unordered_map &harvested_coord_translation, - std::uint64_t ordering) { - return set_dynamic_tlb(tlb_index, tt_xy_pair(0, 0), target, address, false, harvested_coord_translation, ordering); + unsigned int tlb_index, tt_xy_pair target, std::uint64_t address, std::uint64_t ordering) { + return set_dynamic_tlb(tlb_index, tt_xy_pair(0, 0), target, address, false, ordering); } dynamic_tlb TTDevice::set_dynamic_tlb_broadcast( - unsigned int tlb_index, - std::uint64_t address, - std::unordered_map &harvested_coord_translation, - tt_xy_pair start, - tt_xy_pair end, - std::uint64_t ordering) { + unsigned int tlb_index, std::uint64_t address, tt_xy_pair start, tt_xy_pair end, std::uint64_t ordering) { // Issue a broadcast to cores included in the start (top left) and end (bottom right) grid - return set_dynamic_tlb(tlb_index, start, end, address, true, harvested_coord_translation, ordering); + return set_dynamic_tlb(tlb_index, start, end, address, true, ordering); } void TTDevice::configure_iatu_region(size_t region, uint64_t base, uint64_t target, size_t size) {