diff --git a/.github/workflows/build-and-run-all-tests.yml b/.github/workflows/build-and-run-all-tests.yml index ff75e6c2..1469f5f8 100644 --- a/.github/workflows/build-and-run-all-tests.yml +++ b/.github/workflows/build-and-run-all-tests.yml @@ -19,9 +19,14 @@ jobs: {arch: wormhole_b0}, {arch: blackhole}, ] + ubuntu-version: [ + 'ubuntu-22.04', + 'ubuntu-20.04', + ] uses: ./.github/workflows/build-tests.yml with: arch: ${{ matrix.test-group.arch}} + ubuntu-version: ${{ matrix.ubuntu-version}} timeout: 15 test-all: @@ -39,8 +44,14 @@ jobs: # Enable once we have functional cards. # {arch: blackhole}, ] + ubuntu-version: [ + # Running tests on ubuntu-20.04 should be sufficient. Reduce load on CI. + # 'ubuntu-22.04', + 'ubuntu-20.04', + ] uses: ./.github/workflows/run-tests.yml with: arch: ${{ matrix.test-group.arch}} + ubuntu-version: ${{ matrix.ubuntu-version}} card: ${{ matrix.test-group.card}} timeout: ${{ matrix.test-group.timeout}} diff --git a/.github/workflows/build-tests.yml b/.github/workflows/build-tests.yml index bff1d0af..ad5e53ac 100644 --- a/.github/workflows/build-tests.yml +++ b/.github/workflows/build-tests.yml @@ -8,6 +8,9 @@ on: arch: required: true type: string + ubuntu-version: + required: true + type: string timeout: required: true type: number @@ -21,6 +24,13 @@ on: - grayskull - wormhole_b0 - blackhole + ubuntu-version: + required: true + description: 'The version of Ubuntu to build on' + type: choice + options: + - ubuntu-22.04 + - ubuntu-20.04 timeout: required: true description: 'The timeout for the build job in minutes' @@ -40,18 +50,11 @@ jobs: build: # Due to parsing bug, fromJSON is used to convert string to number timeout-minutes: ${{ fromJSON(inputs.timeout) }} - strategy: - fail-fast: false - matrix: - build: [ - {runs-on: ubuntu-22.04, docker-image: tt-umd-ci-ubuntu-22.04}, - {runs-on: ubuntu-20.04, docker-image: tt-umd-ci-ubuntu-20.04}, - ] - name: Build umd_tests for ${{ inputs.arch }} on ${{ matrix.build.runs-on }} - runs-on: ${{ matrix.build.runs-on }} + name: Build umd_tests for ${{ inputs.arch }} on ${{ inputs.ubuntu-version }} + runs-on: ${{ inputs.ubuntu-version }} container: - image: ghcr.io/${{ github.repository }}/${{ matrix.build.docker-image }}:latest + image: ghcr.io/${{ github.repository }}/tt-umd-ci-${{ inputs.ubuntu-version }}:latest options: --user root env: @@ -84,5 +87,5 @@ jobs: - name: Upload build artifacts archive uses: actions/upload-artifact@v4 with: - name: build-artifacts-${{ inputs.arch }}-${{ matrix.build.runs-on }} + name: build-artifacts-${{ inputs.arch }}-${{ inputs.ubuntu-version }} path: artifact.tar diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index e9935dcd..4215dd51 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -7,6 +7,9 @@ on: arch: required: true type: string + ubuntu-version: + required: true + type: string card: required: true type: string @@ -23,6 +26,13 @@ on: - grayskull - wormhole_b0 - blackhole + ubuntu-version: + required: true + description: 'The version of Ubuntu to build on' + type: choice + options: + - ubuntu-22.04 + - ubuntu-20.04 card: required: true description: 'The card to run tests on' @@ -47,20 +57,13 @@ jobs: test: # Due to parsing bug, fromJSON is used to convert string to number timeout-minutes: ${{ fromJSON(inputs.timeout) }} - strategy: - fail-fast: false - matrix: - build: [ - {runs-on: ubuntu-22.04, docker-image: tt-umd-ci-ubuntu-22.04}, - {runs-on: ubuntu-20.04, docker-image: tt-umd-ci-ubuntu-20.04}, - ] - name: Run tests for ${{ inputs.arch }} on ${{ inputs.card }} on ${{ matrix.build.runs-on }} + name: Run tests for ${{ inputs.arch }} on ${{ inputs.card }} on ${{ inputs.ubuntu-version }} runs-on: - self-hosted - ${{ inputs.card }} container: - image: ghcr.io/${{ github.repository }}/${{ matrix.build.docker-image }}:latest + image: ghcr.io/${{ github.repository }}/tt-umd-ci-${{ inputs.ubuntu-version }}:latest options: --user root --device /dev/tenstorrent/0 volumes: - /dev/hugepages:/dev/hugepages @@ -83,7 +86,7 @@ jobs: - name: Use build artifacts uses: actions/download-artifact@v4 with: - name: build-artifacts-${{ inputs.arch }}-${{ matrix.build.runs-on }} + name: build-artifacts-${{ inputs.arch }}-${{ inputs.ubuntu-version }} path: ./ # This is needed to preserve file permissions diff --git a/device/api/umd/device/architecture_implementation.h b/device/api/umd/device/architecture_implementation.h index bfd1c36b..8935621f 100644 --- a/device/api/umd/device/architecture_implementation.h +++ b/device/api/umd/device/architecture_implementation.h @@ -11,9 +11,9 @@ #include #include -#include "umd/device/tlb.h" #include "umd/device/tt_xy_pair.h" #include "umd/device/types/arch.h" +#include "umd/device/types/tlb.h" #include "umd/device/types/xy_pair.h" struct tt_device_l1_address_params; diff --git a/device/api/umd/device/blackhole_implementation.h b/device/api/umd/device/blackhole_implementation.h index a80b4fd6..b248bcfa 100644 --- a/device/api/umd/device/blackhole_implementation.h +++ b/device/api/umd/device/blackhole_implementation.h @@ -10,7 +10,7 @@ #include #include "umd/device/architecture_implementation.h" -#include "umd/device/tlb.h" +#include "umd/device/types/tlb.h" namespace tt::umd { diff --git a/device/api/umd/device/cluster.h b/device/api/umd/device/cluster.h index 9f9100ff..a8a80f26 100644 --- a/device/api/umd/device/cluster.h +++ b/device/api/umd/device/cluster.h @@ -18,12 +18,12 @@ #include "tt_soc_descriptor.h" #include "tt_xy_pair.h" #include "umd/device/chip/chip.h" -#include "umd/device/tlb.h" #include "umd/device/tt_device/tt_device.h" #include "umd/device/tt_io.hpp" #include "umd/device/types/arch.h" #include "umd/device/types/cluster_descriptor_types.h" #include "umd/device/types/cluster_types.h" +#include "umd/device/types/tlb.h" using TLB_DATA = tt::umd::tlb_data; @@ -609,7 +609,6 @@ class Cluster : public tt_device { static void harvest_rows_in_soc_descriptor(tt::ARCH arch, tt_SocDescriptor& sdesc, uint32_t harvested_rows); static std::unordered_map create_harvested_coord_translation( const tt::ARCH arch, bool identity_map); - std::unordered_map get_harvested_coord_translation_map(chip_id_t logical_device_id); virtual std::uint32_t get_num_dram_channels(std::uint32_t device_id); virtual std::uint64_t get_dram_channel_size(std::uint32_t device_id, std::uint32_t channel); virtual std::uint32_t get_num_host_channels(std::uint32_t device_id); diff --git a/device/api/umd/device/grayskull_implementation.h b/device/api/umd/device/grayskull_implementation.h index e7c9ed42..ab33f15d 100644 --- a/device/api/umd/device/grayskull_implementation.h +++ b/device/api/umd/device/grayskull_implementation.h @@ -9,7 +9,7 @@ #include #include "architecture_implementation.h" -#include "umd/device/tlb.h" +#include "umd/device/types/tlb.h" namespace tt::umd { diff --git a/device/api/umd/device/pci_device.hpp b/device/api/umd/device/pci_device.hpp index ce4b6392..edad710e 100644 --- a/device/api/umd/device/pci_device.hpp +++ b/device/api/umd/device/pci_device.hpp @@ -14,9 +14,9 @@ #include "fmt/format.h" #include "umd/device/semver.hpp" -#include "umd/device/tlb.h" #include "umd/device/tt_xy_pair.h" #include "umd/device/types/arch.h" +#include "umd/device/types/tlb.h" namespace tt::umd { class semver_t; diff --git a/device/api/umd/device/tt_cluster_descriptor.h b/device/api/umd/device/tt_cluster_descriptor.h index b7de3fdd..ef83051f 100644 --- a/device/api/umd/device/tt_cluster_descriptor.h +++ b/device/api/umd/device/tt_cluster_descriptor.h @@ -25,12 +25,14 @@ class Node; } enum BoardType : uint32_t { - N150 = 0, - N300 = 1, - E150 = 2, - P150A = 3, - GALAXY = 4, - UNKNOWN = 5, + E75 = 0, + E150 = 1, + E300 = 2, + N150 = 3, + N300 = 4, + P150A = 5, + GALAXY = 6, + UNKNOWN = 7, }; class tt_ClusterDescriptor { diff --git a/device/api/umd/device/tt_device/tt_device.h b/device/api/umd/device/tt_device/tt_device.h index 056d0f08..9a35c8d8 100644 --- a/device/api/umd/device/tt_device/tt_device.h +++ b/device/api/umd/device/tt_device/tt_device.h @@ -69,18 +69,15 @@ class TTDevice { tt_xy_pair end, std::uint64_t address, bool multicast, - std::unordered_map &harvested_coord_translation, std::uint64_t ordering); dynamic_tlb set_dynamic_tlb( unsigned int tlb_index, tt_xy_pair target, std::uint64_t address, - std::unordered_map &harvested_coord_translation, std::uint64_t ordering = tt::umd::tlb_data::Relaxed); dynamic_tlb set_dynamic_tlb_broadcast( unsigned int tlb_index, std::uint64_t address, - std::unordered_map &harvested_coord_translation, tt_xy_pair start, tt_xy_pair end, std::uint64_t ordering = tt::umd::tlb_data::Relaxed); diff --git a/device/api/umd/device/tlb.h b/device/api/umd/device/types/tlb.h similarity index 100% rename from device/api/umd/device/tlb.h rename to device/api/umd/device/types/tlb.h diff --git a/device/api/umd/device/wormhole_implementation.h b/device/api/umd/device/wormhole_implementation.h index 375cf6ad..ce1bf036 100644 --- a/device/api/umd/device/wormhole_implementation.h +++ b/device/api/umd/device/wormhole_implementation.h @@ -9,7 +9,7 @@ #include #include "architecture_implementation.h" -#include "umd/device/tlb.h" +#include "umd/device/types/tlb.h" namespace tt::umd { diff --git a/device/cluster.cpp b/device/cluster.cpp index 6ceab8bd..125cc2c5 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -43,11 +43,11 @@ #include "umd/device/chip/remote_chip.h" #include "umd/device/driver_atomics.h" #include "umd/device/hugepage.h" -#include "umd/device/tlb.h" #include "umd/device/tt_cluster_descriptor.h" #include "umd/device/tt_core_coordinates.h" #include "umd/device/tt_soc_descriptor.h" #include "umd/device/types/arch.h" +#include "umd/device/types/tlb.h" #include "yaml-cpp/yaml.h" using namespace boost::interprocess; @@ -299,10 +299,6 @@ void Cluster::create_device( bool Cluster::using_harvested_soc_descriptors() { return perform_harvesting_on_sdesc && performed_harvesting; } -std::unordered_map Cluster::get_harvested_coord_translation_map(chip_id_t logical_device_id) { - return harvested_coord_translation.at(logical_device_id); -} - std::unordered_map Cluster::get_harvesting_masks_for_soc_descriptors() { if (using_harvested_soc_descriptors()) { return harvested_rows_per_target; @@ -962,11 +958,10 @@ void Cluster::broadcast_pcie_tensix_risc_reset(chip_id_t chip_id, const TensixSo auto [soft_reset_reg, _] = tt_device->set_dynamic_tlb_broadcast( architecture_implementation->get_reg_tlb(), architecture_implementation->get_tensix_soft_reset_addr(), - harvested_coord_translation.at(chip_id), - tt_xy_pair(0, 0), - tt_xy_pair( + harvested_coord_translation.at(chip_id).at(tt_xy_pair(0, 0)), + harvested_coord_translation.at(chip_id).at(tt_xy_pair( architecture_implementation->get_grid_size_x() - 1, - architecture_implementation->get_grid_size_y() - 1 - num_rows_harvested.at(chip_id)), + architecture_implementation->get_grid_size_y() - 1 - num_rows_harvested.at(chip_id))), TLB_DATA::Posted); tt_device->write_regs(soft_reset_reg, 1, &valid); tt_driver_atomics::sfence(); @@ -1120,9 +1115,8 @@ void Cluster::write_device_memory( while (size_in_bytes > 0) { auto [mapped_address, tlb_size] = dev->set_dynamic_tlb( tlb_index, - target, + harvested_coord_translation.at(target.chip).at(target), address, - harvested_coord_translation.at(target.chip), dynamic_tlb_ordering_modes.at(fallback_tlb)); uint32_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size); dev->write_block(mapped_address, transfer_size, buffer_addr); @@ -1169,9 +1163,8 @@ void Cluster::read_device_memory( while (size_in_bytes > 0) { auto [mapped_address, tlb_size] = dev->set_dynamic_tlb( tlb_index, - target, + harvested_coord_translation.at(target.chip).at(target), address, - harvested_coord_translation.at(target.chip), dynamic_tlb_ordering_modes.at(fallback_tlb)); uint32_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size); dev->read_block(mapped_address, transfer_size, buffer_addr); @@ -1353,7 +1346,8 @@ void Cluster::configure_tlb( tlb_index); TTDevice* tt_device = get_tt_device(logical_device_id); - tt_device->set_dynamic_tlb(tlb_index, core, address, harvested_coord_translation.at(logical_device_id), ordering); + tt_device->set_dynamic_tlb( + tlb_index, harvested_coord_translation.at(logical_device_id).at(core), address, ordering); auto tlb_size = std::get<1>(tt_device->get_architecture_implementation()->describe_tlb(tlb_index).value()); tlb_config_map.at(logical_device_id).insert({tlb_index, (address / tlb_size) * tlb_size}); map_core_to_tlb_per_chip.at(logical_device_id).insert({core, tlb_index}); @@ -1447,9 +1441,8 @@ int Cluster::test_setup_interface() { uint32_t mapped_reg = tt_device ->set_dynamic_tlb( tt_device->get_architecture_implementation()->get_reg_tlb(), - tt_xy_pair(0, 0), - 0xffb20108, - harvested_coord_translation.at(chip_id)) + harvested_coord_translation.at(chip_id).at(tt_xy_pair(0, 0)), + 0xffb20108) .bar_offset; uint32_t regval = 0; @@ -1460,9 +1453,8 @@ int Cluster::test_setup_interface() { uint32_t mapped_reg = tt_device ->set_dynamic_tlb( tt_device->get_architecture_implementation()->get_reg_tlb(), - tt_xy_pair(1, 0), - 0xffb20108, - harvested_coord_translation.at(chip_id)) + harvested_coord_translation.at(chip_id).at(tt_xy_pair(1, 0)), + 0xffb20108) .bar_offset; uint32_t regval = 0; @@ -1475,9 +1467,8 @@ int Cluster::test_setup_interface() { // uint32_t mapped_reg = tt_device // ->set_dynamic_tlb( // tt_device->get_architecture_implementation()->get_reg_tlb(), - // tt_xy_pair(1, 0), - // 0xffb20108, - // harvested_coord_translation.at(logical_device_id)) + // harvested_coord_translation.at(chip_id).at(tt_xy_pair(1, 0)), + // 0xffb20108) // .bar_offset; // uint32_t regval = 0; @@ -2523,9 +2514,8 @@ void Cluster::pcie_broadcast_write( auto [mapped_address, tlb_size] = tt_device->set_dynamic_tlb_broadcast( tlb_index, addr, - harvested_coord_translation.at(chip), - start, - end, + harvested_coord_translation.at(chip).at(start), + harvested_coord_translation.at(chip).at(end), dynamic_tlb_ordering_modes.at(fallback_tlb)); uint64_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size); tt_device->write_block(mapped_address, transfer_size, buffer_addr); @@ -3005,8 +2995,8 @@ void Cluster::read_mmio_device_register( const scoped_lock lock(*get_mutex(fallback_tlb, core.chip)); log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index); - auto [mapped_address, tlb_size] = - tt_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation.at(core.chip), TLB_DATA::Strict); + auto [mapped_address, tlb_size] = tt_device->set_dynamic_tlb( + tlb_index, harvested_coord_translation.at(core.chip).at(core), addr, TLB_DATA::Strict); // Align block to 4bytes if needed. auto aligned_buf = tt_4_byte_aligned_buffer(mem_ptr, size); tt_device->read_regs(mapped_address, aligned_buf.block_size / sizeof(std::uint32_t), aligned_buf.local_storage); @@ -3025,8 +3015,8 @@ void Cluster::write_mmio_device_register( const scoped_lock lock(*get_mutex(fallback_tlb, core.chip)); log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index); - auto [mapped_address, tlb_size] = - tt_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation.at(core.chip), TLB_DATA::Strict); + auto [mapped_address, tlb_size] = tt_device->set_dynamic_tlb( + tlb_index, harvested_coord_translation.at(core.chip).at(core), addr, TLB_DATA::Strict); // Align block to 4bytes if needed. auto aligned_buf = tt_4_byte_aligned_buffer(mem_ptr, size); if (aligned_buf.input_size != aligned_buf.block_size) { diff --git a/device/tlb.cpp b/device/tlb.cpp index 6168c9ad..14527655 100644 --- a/device/tlb.cpp +++ b/device/tlb.cpp @@ -2,7 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "umd/device/tlb.h" +#include "umd/device/types/tlb.h" namespace tt::umd { diff --git a/device/tt_cluster_descriptor.cpp b/device/tt_cluster_descriptor.cpp index bc35ceb9..04b8beb6 100644 --- a/device/tt_cluster_descriptor.cpp +++ b/device/tt_cluster_descriptor.cpp @@ -737,16 +737,20 @@ void tt_ClusterDescriptor::load_chips_from_connectivity_descriptor(YAML::Node &y for (const auto &chip_board_type : yaml["boardtype"].as>()) { auto &chip = chip_board_type.first; BoardType board_type; - if (chip_board_type.second == "n150") { + if (chip_board_type.second == "e75") { + board_type = BoardType::E75; + } else if (chip_board_type.second == "e150") { + board_type = BoardType::E150; + } else if (chip_board_type.second == "e300") { + board_type = BoardType::E300; + } else if (chip_board_type.second == "n150") { board_type = BoardType::N150; } else if (chip_board_type.second == "n300") { board_type = BoardType::N300; - } else if (chip_board_type.second == "GALAXY") { - board_type = BoardType::GALAXY; - } else if (chip_board_type.second == "e150") { - board_type = BoardType::E150; } else if (chip_board_type.second == "p150A") { board_type = BoardType::P150A; + } else if (chip_board_type.second == "GALAXY") { + board_type = BoardType::GALAXY; } else { log_warning( LogSiliconDriver, diff --git a/device/tt_device/tt_device.cpp b/device/tt_device/tt_device.cpp index e4ecca50..8f74bad9 100644 --- a/device/tt_device/tt_device.cpp +++ b/device/tt_device/tt_device.cpp @@ -232,7 +232,6 @@ dynamic_tlb TTDevice::set_dynamic_tlb( tt_xy_pair end, std::uint64_t address, bool multicast, - std::unordered_map &harvested_coord_translation, std::uint64_t ordering) { if (multicast) { std::tie(start, end) = architecture_impl_->multicast_workaround(start, end); @@ -253,8 +252,6 @@ dynamic_tlb TTDevice::set_dynamic_tlb( tt::umd::tlb_configuration tlb_config = architecture_impl_->get_tlb_configuration(tlb_index); std::uint32_t TLB_CFG_REG_SIZE_BYTES = architecture_impl_->get_tlb_cfg_reg_size_bytes(); - auto translated_start_coords = harvested_coord_translation.at(start); - auto translated_end_coords = harvested_coord_translation.at(end); uint64_t tlb_address = address / tlb_config.size; uint32_t local_address = address % tlb_config.size; uint64_t tlb_base = tlb_config.base + (tlb_config.size * tlb_config.index_offset); @@ -263,10 +260,10 @@ dynamic_tlb TTDevice::set_dynamic_tlb( std::pair tlb_data = tt::umd::tlb_data{ .local_offset = tlb_address, - .x_end = static_cast(translated_end_coords.x), - .y_end = static_cast(translated_end_coords.y), - .x_start = static_cast(translated_start_coords.x), - .y_start = static_cast(translated_start_coords.y), + .x_end = static_cast(end.x), + .y_end = static_cast(end.y), + .x_start = static_cast(start.x), + .y_start = static_cast(start.y), .mcast = multicast, .ordering = ordering, // TODO #2715: hack for Blackhole A0, will potentially be fixed in B0. @@ -291,23 +288,14 @@ dynamic_tlb TTDevice::set_dynamic_tlb( } dynamic_tlb TTDevice::set_dynamic_tlb( - unsigned int tlb_index, - tt_xy_pair target, - std::uint64_t address, - std::unordered_map &harvested_coord_translation, - std::uint64_t ordering) { - return set_dynamic_tlb(tlb_index, tt_xy_pair(0, 0), target, address, false, harvested_coord_translation, ordering); + unsigned int tlb_index, tt_xy_pair target, std::uint64_t address, std::uint64_t ordering) { + return set_dynamic_tlb(tlb_index, tt_xy_pair(0, 0), target, address, false, ordering); } dynamic_tlb TTDevice::set_dynamic_tlb_broadcast( - unsigned int tlb_index, - std::uint64_t address, - std::unordered_map &harvested_coord_translation, - tt_xy_pair start, - tt_xy_pair end, - std::uint64_t ordering) { + unsigned int tlb_index, std::uint64_t address, tt_xy_pair start, tt_xy_pair end, std::uint64_t ordering) { // Issue a broadcast to cores included in the start (top left) and end (bottom right) grid - return set_dynamic_tlb(tlb_index, start, end, address, true, harvested_coord_translation, ordering); + return set_dynamic_tlb(tlb_index, start, end, address, true, ordering); } void TTDevice::configure_iatu_region(size_t region, uint64_t base, uint64_t target, size_t size) { diff --git a/tests/api/cluster_descriptor_examples/grayskull_e75.yaml b/tests/api/cluster_descriptor_examples/grayskull_e75.yaml new file mode 100644 index 00000000..c8e1541f --- /dev/null +++ b/tests/api/cluster_descriptor_examples/grayskull_e75.yaml @@ -0,0 +1,23 @@ +arch: { + 0: Grayskull, +} + +chips: { +} + +ethernet_connections: [ +] + +chips_with_mmio: [ + 0: 0, +] + +# harvest_mask is the bit indicating which tensix row is harvested. So bit 0 = first tensix row; bit 1 = second tensix row etc... +harvesting: { + 0: {noc_translation: false, harvest_mask: 0}, +} + +# This value will be null if the boardtype is unknown, should never happen in practice but to be defensive it would be useful to throw an error on this case. +boardtype: { + 0: e75, +} \ No newline at end of file diff --git a/tests/api/test_cluster_descriptor.cpp b/tests/api/test_cluster_descriptor.cpp index 68a0a61e..19c0adc6 100644 --- a/tests/api/test_cluster_descriptor.cpp +++ b/tests/api/test_cluster_descriptor.cpp @@ -75,6 +75,7 @@ TEST(ApiClusterDescriptorTest, TestAllOfflineClusterDescriptors) { for (std::string cluster_desc_yaml : { "blackhole_P150.yaml", "galaxy.yaml", + "grayskull_e75.yaml", "grayskull_E150.yaml", "grayskull_E300.yaml", "wormhole_2xN300_unconnected.yaml",