Skip to content

Commit

Permalink
Merge branch 'main' into brosko/logical_mutex
Browse files Browse the repository at this point in the history
  • Loading branch information
broskoTT authored Dec 18, 2024
2 parents 2fb24e0 + e454c69 commit 86d9432
Show file tree
Hide file tree
Showing 18 changed files with 114 additions and 93 deletions.
11 changes: 11 additions & 0 deletions .github/workflows/build-and-run-all-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,14 @@ jobs:
{arch: wormhole_b0},
{arch: blackhole},
]
ubuntu-version: [
'ubuntu-22.04',
'ubuntu-20.04',
]
uses: ./.github/workflows/build-tests.yml
with:
arch: ${{ matrix.test-group.arch}}
ubuntu-version: ${{ matrix.ubuntu-version}}
timeout: 15

test-all:
Expand All @@ -39,8 +44,14 @@ jobs:
# Enable once we have functional cards.
# {arch: blackhole},
]
ubuntu-version: [
# Running tests on ubuntu-20.04 should be sufficient. Reduce load on CI.
# 'ubuntu-22.04',
'ubuntu-20.04',
]
uses: ./.github/workflows/run-tests.yml
with:
arch: ${{ matrix.test-group.arch}}
ubuntu-version: ${{ matrix.ubuntu-version}}
card: ${{ matrix.test-group.card}}
timeout: ${{ matrix.test-group.timeout}}
25 changes: 14 additions & 11 deletions .github/workflows/build-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ on:
arch:
required: true
type: string
ubuntu-version:
required: true
type: string
timeout:
required: true
type: number
Expand All @@ -21,6 +24,13 @@ on:
- grayskull
- wormhole_b0
- blackhole
ubuntu-version:
required: true
description: 'The version of Ubuntu to build on'
type: choice
options:
- ubuntu-22.04
- ubuntu-20.04
timeout:
required: true
description: 'The timeout for the build job in minutes'
Expand All @@ -40,18 +50,11 @@ jobs:
build:
# Due to parsing bug, fromJSON is used to convert string to number
timeout-minutes: ${{ fromJSON(inputs.timeout) }}
strategy:
fail-fast: false
matrix:
build: [
{runs-on: ubuntu-22.04, docker-image: tt-umd-ci-ubuntu-22.04},
{runs-on: ubuntu-20.04, docker-image: tt-umd-ci-ubuntu-20.04},
]

name: Build umd_tests for ${{ inputs.arch }} on ${{ matrix.build.runs-on }}
runs-on: ${{ matrix.build.runs-on }}
name: Build umd_tests for ${{ inputs.arch }} on ${{ inputs.ubuntu-version }}
runs-on: ${{ inputs.ubuntu-version }}
container:
image: ghcr.io/${{ github.repository }}/${{ matrix.build.docker-image }}:latest
image: ghcr.io/${{ github.repository }}/tt-umd-ci-${{ inputs.ubuntu-version }}:latest
options: --user root

env:
Expand Down Expand Up @@ -84,5 +87,5 @@ jobs:
- name: Upload build artifacts archive
uses: actions/upload-artifact@v4
with:
name: build-artifacts-${{ inputs.arch }}-${{ matrix.build.runs-on }}
name: build-artifacts-${{ inputs.arch }}-${{ inputs.ubuntu-version }}
path: artifact.tar
23 changes: 13 additions & 10 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ on:
arch:
required: true
type: string
ubuntu-version:
required: true
type: string
card:
required: true
type: string
Expand All @@ -23,6 +26,13 @@ on:
- grayskull
- wormhole_b0
- blackhole
ubuntu-version:
required: true
description: 'The version of Ubuntu to build on'
type: choice
options:
- ubuntu-22.04
- ubuntu-20.04
card:
required: true
description: 'The card to run tests on'
Expand All @@ -47,20 +57,13 @@ jobs:
test:
# Due to parsing bug, fromJSON is used to convert string to number
timeout-minutes: ${{ fromJSON(inputs.timeout) }}
strategy:
fail-fast: false
matrix:
build: [
{runs-on: ubuntu-22.04, docker-image: tt-umd-ci-ubuntu-22.04},
{runs-on: ubuntu-20.04, docker-image: tt-umd-ci-ubuntu-20.04},
]

name: Run tests for ${{ inputs.arch }} on ${{ inputs.card }} on ${{ matrix.build.runs-on }}
name: Run tests for ${{ inputs.arch }} on ${{ inputs.card }} on ${{ inputs.ubuntu-version }}
runs-on:
- self-hosted
- ${{ inputs.card }}
container:
image: ghcr.io/${{ github.repository }}/${{ matrix.build.docker-image }}:latest
image: ghcr.io/${{ github.repository }}/tt-umd-ci-${{ inputs.ubuntu-version }}:latest
options: --user root --device /dev/tenstorrent/0
volumes:
- /dev/hugepages:/dev/hugepages
Expand All @@ -83,7 +86,7 @@ jobs:
- name: Use build artifacts
uses: actions/download-artifact@v4
with:
name: build-artifacts-${{ inputs.arch }}-${{ matrix.build.runs-on }}
name: build-artifacts-${{ inputs.arch }}-${{ inputs.ubuntu-version }}
path: ./

# This is needed to preserve file permissions
Expand Down
2 changes: 1 addition & 1 deletion device/api/umd/device/architecture_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
#include <tuple>
#include <vector>

#include "umd/device/tlb.h"
#include "umd/device/tt_xy_pair.h"
#include "umd/device/types/arch.h"
#include "umd/device/types/tlb.h"
#include "umd/device/types/xy_pair.h"

struct tt_device_l1_address_params;
Expand Down
2 changes: 1 addition & 1 deletion device/api/umd/device/blackhole_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include <stdexcept>

#include "umd/device/architecture_implementation.h"
#include "umd/device/tlb.h"
#include "umd/device/types/tlb.h"

namespace tt::umd {

Expand Down
3 changes: 1 addition & 2 deletions device/api/umd/device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
#include "tt_soc_descriptor.h"
#include "tt_xy_pair.h"
#include "umd/device/chip/chip.h"
#include "umd/device/tlb.h"
#include "umd/device/tt_device/tt_device.h"
#include "umd/device/tt_io.hpp"
#include "umd/device/types/arch.h"
#include "umd/device/types/cluster_descriptor_types.h"
#include "umd/device/types/cluster_types.h"
#include "umd/device/types/tlb.h"

using TLB_DATA = tt::umd::tlb_data;

Expand Down Expand Up @@ -609,7 +609,6 @@ class Cluster : public tt_device {
static void harvest_rows_in_soc_descriptor(tt::ARCH arch, tt_SocDescriptor& sdesc, uint32_t harvested_rows);
static std::unordered_map<tt_xy_pair, tt_xy_pair> create_harvested_coord_translation(
const tt::ARCH arch, bool identity_map);
std::unordered_map<tt_xy_pair, tt_xy_pair> get_harvested_coord_translation_map(chip_id_t logical_device_id);
virtual std::uint32_t get_num_dram_channels(std::uint32_t device_id);
virtual std::uint64_t get_dram_channel_size(std::uint32_t device_id, std::uint32_t channel);
virtual std::uint32_t get_num_host_channels(std::uint32_t device_id);
Expand Down
2 changes: 1 addition & 1 deletion device/api/umd/device/grayskull_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <array>

#include "architecture_implementation.h"
#include "umd/device/tlb.h"
#include "umd/device/types/tlb.h"

namespace tt::umd {

Expand Down
2 changes: 1 addition & 1 deletion device/api/umd/device/pci_device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@

#include "fmt/format.h"
#include "umd/device/semver.hpp"
#include "umd/device/tlb.h"
#include "umd/device/tt_xy_pair.h"
#include "umd/device/types/arch.h"
#include "umd/device/types/tlb.h"

namespace tt::umd {
class semver_t;
Expand Down
14 changes: 8 additions & 6 deletions device/api/umd/device/tt_cluster_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@ class Node;
}

enum BoardType : uint32_t {
N150 = 0,
N300 = 1,
E150 = 2,
P150A = 3,
GALAXY = 4,
UNKNOWN = 5,
E75 = 0,
E150 = 1,
E300 = 2,
N150 = 3,
N300 = 4,
P150A = 5,
GALAXY = 6,
UNKNOWN = 7,
};

class tt_ClusterDescriptor {
Expand Down
3 changes: 0 additions & 3 deletions device/api/umd/device/tt_device/tt_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,15 @@ class TTDevice {
tt_xy_pair end,
std::uint64_t address,
bool multicast,
std::unordered_map<tt_xy_pair, tt_xy_pair> &harvested_coord_translation,
std::uint64_t ordering);
dynamic_tlb set_dynamic_tlb(
unsigned int tlb_index,
tt_xy_pair target,
std::uint64_t address,
std::unordered_map<tt_xy_pair, tt_xy_pair> &harvested_coord_translation,
std::uint64_t ordering = tt::umd::tlb_data::Relaxed);
dynamic_tlb set_dynamic_tlb_broadcast(
unsigned int tlb_index,
std::uint64_t address,
std::unordered_map<tt_xy_pair, tt_xy_pair> &harvested_coord_translation,
tt_xy_pair start,
tt_xy_pair end,
std::uint64_t ordering = tt::umd::tlb_data::Relaxed);
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion device/api/umd/device/wormhole_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <array>

#include "architecture_implementation.h"
#include "umd/device/tlb.h"
#include "umd/device/types/tlb.h"

namespace tt::umd {

Expand Down
50 changes: 20 additions & 30 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@
#include "umd/device/chip/remote_chip.h"
#include "umd/device/driver_atomics.h"
#include "umd/device/hugepage.h"
#include "umd/device/tlb.h"
#include "umd/device/tt_cluster_descriptor.h"
#include "umd/device/tt_core_coordinates.h"
#include "umd/device/tt_soc_descriptor.h"
#include "umd/device/types/arch.h"
#include "umd/device/types/tlb.h"
#include "yaml-cpp/yaml.h"

using namespace boost::interprocess;
Expand Down Expand Up @@ -299,10 +299,6 @@ void Cluster::create_device(

bool Cluster::using_harvested_soc_descriptors() { return perform_harvesting_on_sdesc && performed_harvesting; }

std::unordered_map<tt_xy_pair, tt_xy_pair> Cluster::get_harvested_coord_translation_map(chip_id_t logical_device_id) {
return harvested_coord_translation.at(logical_device_id);
}

std::unordered_map<chip_id_t, uint32_t> Cluster::get_harvesting_masks_for_soc_descriptors() {
if (using_harvested_soc_descriptors()) {
return harvested_rows_per_target;
Expand Down Expand Up @@ -962,11 +958,10 @@ void Cluster::broadcast_pcie_tensix_risc_reset(chip_id_t chip_id, const TensixSo
auto [soft_reset_reg, _] = tt_device->set_dynamic_tlb_broadcast(
architecture_implementation->get_reg_tlb(),
architecture_implementation->get_tensix_soft_reset_addr(),
harvested_coord_translation.at(chip_id),
tt_xy_pair(0, 0),
tt_xy_pair(
harvested_coord_translation.at(chip_id).at(tt_xy_pair(0, 0)),
harvested_coord_translation.at(chip_id).at(tt_xy_pair(
architecture_implementation->get_grid_size_x() - 1,
architecture_implementation->get_grid_size_y() - 1 - num_rows_harvested.at(chip_id)),
architecture_implementation->get_grid_size_y() - 1 - num_rows_harvested.at(chip_id))),
TLB_DATA::Posted);
tt_device->write_regs(soft_reset_reg, 1, &valid);
tt_driver_atomics::sfence();
Expand Down Expand Up @@ -1120,9 +1115,8 @@ void Cluster::write_device_memory(
while (size_in_bytes > 0) {
auto [mapped_address, tlb_size] = dev->set_dynamic_tlb(
tlb_index,
target,
harvested_coord_translation.at(target.chip).at(target),
address,
harvested_coord_translation.at(target.chip),
dynamic_tlb_ordering_modes.at(fallback_tlb));
uint32_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size);
dev->write_block(mapped_address, transfer_size, buffer_addr);
Expand Down Expand Up @@ -1169,9 +1163,8 @@ void Cluster::read_device_memory(
while (size_in_bytes > 0) {
auto [mapped_address, tlb_size] = dev->set_dynamic_tlb(
tlb_index,
target,
harvested_coord_translation.at(target.chip).at(target),
address,
harvested_coord_translation.at(target.chip),
dynamic_tlb_ordering_modes.at(fallback_tlb));
uint32_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size);
dev->read_block(mapped_address, transfer_size, buffer_addr);
Expand Down Expand Up @@ -1353,7 +1346,8 @@ void Cluster::configure_tlb(
tlb_index);

TTDevice* tt_device = get_tt_device(logical_device_id);
tt_device->set_dynamic_tlb(tlb_index, core, address, harvested_coord_translation.at(logical_device_id), ordering);
tt_device->set_dynamic_tlb(
tlb_index, harvested_coord_translation.at(logical_device_id).at(core), address, ordering);
auto tlb_size = std::get<1>(tt_device->get_architecture_implementation()->describe_tlb(tlb_index).value());
tlb_config_map.at(logical_device_id).insert({tlb_index, (address / tlb_size) * tlb_size});
map_core_to_tlb_per_chip.at(logical_device_id).insert({core, tlb_index});
Expand Down Expand Up @@ -1447,9 +1441,8 @@ int Cluster::test_setup_interface() {
uint32_t mapped_reg = tt_device
->set_dynamic_tlb(
tt_device->get_architecture_implementation()->get_reg_tlb(),
tt_xy_pair(0, 0),
0xffb20108,
harvested_coord_translation.at(chip_id))
harvested_coord_translation.at(chip_id).at(tt_xy_pair(0, 0)),
0xffb20108)
.bar_offset;

uint32_t regval = 0;
Expand All @@ -1460,9 +1453,8 @@ int Cluster::test_setup_interface() {
uint32_t mapped_reg = tt_device
->set_dynamic_tlb(
tt_device->get_architecture_implementation()->get_reg_tlb(),
tt_xy_pair(1, 0),
0xffb20108,
harvested_coord_translation.at(chip_id))
harvested_coord_translation.at(chip_id).at(tt_xy_pair(1, 0)),
0xffb20108)
.bar_offset;

uint32_t regval = 0;
Expand All @@ -1475,9 +1467,8 @@ int Cluster::test_setup_interface() {
// uint32_t mapped_reg = tt_device
// ->set_dynamic_tlb(
// tt_device->get_architecture_implementation()->get_reg_tlb(),
// tt_xy_pair(1, 0),
// 0xffb20108,
// harvested_coord_translation.at(logical_device_id))
// harvested_coord_translation.at(chip_id).at(tt_xy_pair(1, 0)),
// 0xffb20108)
// .bar_offset;

// uint32_t regval = 0;
Expand Down Expand Up @@ -2523,9 +2514,8 @@ void Cluster::pcie_broadcast_write(
auto [mapped_address, tlb_size] = tt_device->set_dynamic_tlb_broadcast(
tlb_index,
addr,
harvested_coord_translation.at(chip),
start,
end,
harvested_coord_translation.at(chip).at(start),
harvested_coord_translation.at(chip).at(end),
dynamic_tlb_ordering_modes.at(fallback_tlb));
uint64_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size);
tt_device->write_block(mapped_address, transfer_size, buffer_addr);
Expand Down Expand Up @@ -3005,8 +2995,8 @@ void Cluster::read_mmio_device_register(
const scoped_lock<named_mutex> lock(*get_mutex(fallback_tlb, core.chip));
log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index);

auto [mapped_address, tlb_size] =
tt_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation.at(core.chip), TLB_DATA::Strict);
auto [mapped_address, tlb_size] = tt_device->set_dynamic_tlb(
tlb_index, harvested_coord_translation.at(core.chip).at(core), addr, TLB_DATA::Strict);
// Align block to 4bytes if needed.
auto aligned_buf = tt_4_byte_aligned_buffer(mem_ptr, size);
tt_device->read_regs(mapped_address, aligned_buf.block_size / sizeof(std::uint32_t), aligned_buf.local_storage);
Expand All @@ -3025,8 +3015,8 @@ void Cluster::write_mmio_device_register(
const scoped_lock<named_mutex> lock(*get_mutex(fallback_tlb, core.chip));
log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index);

auto [mapped_address, tlb_size] =
tt_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation.at(core.chip), TLB_DATA::Strict);
auto [mapped_address, tlb_size] = tt_device->set_dynamic_tlb(
tlb_index, harvested_coord_translation.at(core.chip).at(core), addr, TLB_DATA::Strict);
// Align block to 4bytes if needed.
auto aligned_buf = tt_4_byte_aligned_buffer(mem_ptr, size);
if (aligned_buf.input_size != aligned_buf.block_size) {
Expand Down
2 changes: 1 addition & 1 deletion device/tlb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: Apache-2.0

#include "umd/device/tlb.h"
#include "umd/device/types/tlb.h"

namespace tt::umd {

Expand Down
Loading

0 comments on commit 86d9432

Please sign in to comment.