Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disable tests that are intermittently failing #365

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 33 additions & 31 deletions tests/api/test_chip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,46 +137,48 @@ TEST(ApiChipTest, DeassertRiscResetOnCore) {

// This tests puts a specific core into reset and then specifies a legal deassert value
// It reads back the risc reset reg to validate
TEST(ApiChipTest, SpecifyLegalDeassertRiscResetOnCore) {
std::unique_ptr<Cluster> umd_cluster = get_cluster();
// TOOD issue#362
// TEST(ApiChipTest, SpecifyLegalDeassertRiscResetOnCore) {
// std::unique_ptr<Cluster> umd_cluster = get_cluster();

if (umd_cluster == nullptr || umd_cluster->get_all_chips_in_cluster().empty()) {
GTEST_SKIP() << "No chips present on the system. Skipping test.";
}
// if (umd_cluster == nullptr || umd_cluster->get_all_chips_in_cluster().empty()) {
// GTEST_SKIP() << "No chips present on the system. Skipping test.";
// }

tt_cxy_pair chip_core_coord = get_tensix_chip_core_coord(umd_cluster);
// tt_cxy_pair chip_core_coord = get_tensix_chip_core_coord(umd_cluster);

umd_cluster->assert_risc_reset_at_core(chip_core_coord);
TensixSoftResetOptions deassert_val = ALL_TRISC_SOFT_RESET | TensixSoftResetOptions::STAGGERED_START;
umd_cluster->deassert_risc_reset_at_core(chip_core_coord, deassert_val);
umd_cluster->l1_membar(chip_core_coord.chip, "LARGE_WRITE_TLB");
// umd_cluster->assert_risc_reset_at_core(chip_core_coord);
// TensixSoftResetOptions deassert_val = ALL_TRISC_SOFT_RESET | TensixSoftResetOptions::STAGGERED_START;
// umd_cluster->deassert_risc_reset_at_core(chip_core_coord, deassert_val);
// umd_cluster->l1_membar(chip_core_coord.chip, "LARGE_WRITE_TLB");

uint32_t soft_reset_reg_addr = 0xFFB121B0;
uint32_t risc_reset_val;
umd_cluster->read_from_device(&risc_reset_val, chip_core_coord, soft_reset_reg_addr, sizeof(uint32_t), "REG_TLB");
EXPECT_EQ(static_cast<uint32_t>(deassert_val), risc_reset_val);
}
// uint32_t soft_reset_reg_addr = 0xFFB121B0;
// uint32_t risc_reset_val;
// umd_cluster->read_from_device(&risc_reset_val, chip_core_coord, soft_reset_reg_addr, sizeof(uint32_t),
// "REG_TLB"); EXPECT_EQ(static_cast<uint32_t>(deassert_val), risc_reset_val);
// }

// // This tests puts a specific core into reset and then specifies an illegal deassert value
// // It reads back the risc reset reg to validate that reset reg is in a legal state
TEST(ApiChipTest, SpecifyIllegalDeassertRiscResetOnCore) {
std::unique_ptr<Cluster> umd_cluster = get_cluster();
// TOOD issue#362
// TEST(ApiChipTest, SpecifyIllegalDeassertRiscResetOnCore) {
// std::unique_ptr<Cluster> umd_cluster = get_cluster();

if (umd_cluster == nullptr || umd_cluster->get_all_chips_in_cluster().empty()) {
GTEST_SKIP() << "No chips present on the system. Skipping test.";
}
// if (umd_cluster == nullptr || umd_cluster->get_all_chips_in_cluster().empty()) {
// GTEST_SKIP() << "No chips present on the system. Skipping test.";
// }

tt_cxy_pair chip_core_coord = get_tensix_chip_core_coord(umd_cluster);
// tt_cxy_pair chip_core_coord = get_tensix_chip_core_coord(umd_cluster);

umd_cluster->assert_risc_reset_at_core(chip_core_coord);
// umd_cluster->assert_risc_reset_at_core(chip_core_coord);

TensixSoftResetOptions deassert_val = static_cast<TensixSoftResetOptions>(0xDEADBEEF);
umd_cluster->deassert_risc_reset_at_core(chip_core_coord, deassert_val);
umd_cluster->l1_membar(chip_core_coord.chip, "LARGE_WRITE_TLB");
// TensixSoftResetOptions deassert_val = static_cast<TensixSoftResetOptions>(0xDEADBEEF);
// umd_cluster->deassert_risc_reset_at_core(chip_core_coord, deassert_val);
// umd_cluster->l1_membar(chip_core_coord.chip, "LARGE_WRITE_TLB");

uint32_t soft_reset_reg_addr = 0xFFB121B0;
uint32_t risc_reset_val;
umd_cluster->read_from_device(&risc_reset_val, chip_core_coord, soft_reset_reg_addr, sizeof(uint32_t), "REG_TLB");
uint32_t expected_deassert_val = static_cast<uint32_t>(deassert_val & ALL_TENSIX_SOFT_RESET);
EXPECT_EQ(risc_reset_val, expected_deassert_val);
}
// uint32_t soft_reset_reg_addr = 0xFFB121B0;
// uint32_t risc_reset_val;
// umd_cluster->read_from_device(&risc_reset_val, chip_core_coord, soft_reset_reg_addr, sizeof(uint32_t),
// "REG_TLB"); uint32_t expected_deassert_val = static_cast<uint32_t>(deassert_val & ALL_TENSIX_SOFT_RESET);
// EXPECT_EQ(risc_reset_val, expected_deassert_val);
// }
240 changes: 121 additions & 119 deletions tests/wormhole/test_silicon_driver_wh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -866,122 +866,124 @@ TEST(SiliconDriverWH, SysmemTestWithPcie) {
* Same idea as above, but with four channels of sysmem and random addresses.
* The hardware mechanism is too slow to sweep the entire range.
*/
TEST(SiliconDriverWH, RandomSysmemTestWithPcie) {
const size_t num_channels = 2; // ideally 4, but CI seems to have 2...
auto target_devices = get_target_devices();

Cluster cluster(
test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"),
target_devices,
num_channels,
false, // skip driver allocs - no (don't skip)
true, // clean system resources - yes
true); // perform harvesting - yes

set_params_for_remote_txn(cluster);
cluster.start_device(tt_device_params{}); // no special parameters

const chip_id_t mmio_chip_id = 0;
const auto PCIE = cluster.get_soc_descriptor(mmio_chip_id).pcie_cores.at(0);
const tt_cxy_pair PCIE_CORE(mmio_chip_id, PCIE.x, PCIE.y);
const size_t ONE_GIG = 1 << 30;
const size_t num_tests = 0x20000; // runs in a reasonable amount of time

// PCIe core is at (x=0, y=3) on Wormhole NOC0.
ASSERT_EQ(PCIE.x, 0);
ASSERT_EQ(PCIE.y, 3);

const uint64_t ALIGNMENT = sizeof(uint32_t);
auto generate_aligned_address = [&](uint64_t lo, uint64_t hi) -> uint64_t {
static std::random_device rd;
static std::mt19937_64 gen(rd());
std::uniform_int_distribution<uint64_t> dis(lo / ALIGNMENT, hi / ALIGNMENT);
return dis(gen) * ALIGNMENT;
};

uint64_t base_address = cluster.get_pcie_base_addr_from_device(mmio_chip_id);
for (size_t channel = 0; channel < num_channels; ++channel) {
uint8_t* sysmem = (uint8_t*)cluster.host_dma_address(0, 0, channel);
ASSERT_NE(sysmem, nullptr);

test_utils::fill_with_random_bytes(sysmem, ONE_GIG);

uint64_t lo = (ONE_GIG * channel);
uint64_t hi = (lo + ONE_GIG) - 1;

if (channel == 3) {
// TODO: I thought everything past 0xffff'dddd was registers or
// something, but a) I don't know what's actually there, and b)
// the unusable range seems to be bigger than that... so
// restricting to 0x8'f000'0000.
hi &= ~0x0fff'ffffULL;
}

for (size_t i = 0; i < num_tests; ++i) {
uint64_t address = generate_aligned_address(lo, hi);
uint64_t noc_addr = base_address + address;
uint64_t sysmem_address = address - lo;

ASSERT_GE(address, lo) << "Address too low";
ASSERT_LE(address, hi) << "Address too high";
ASSERT_EQ(address % ALIGNMENT, 0) << "Address not properly aligned";

uint32_t value = 0;
cluster.read_from_device(&value, PCIE_CORE, noc_addr, sizeof(uint32_t), "LARGE_READ_TLB");

uint32_t expected = *reinterpret_cast<uint32_t*>(&sysmem[sysmem_address]);
ASSERT_EQ(value, expected) << fmt::format("Mismatch at address {:#x}", address);
}
}
}

TEST(SiliconDriverWH, LargeAddressTlb) {
const size_t num_channels = 1;
auto target_devices = get_target_devices();

Cluster cluster(
test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"),
target_devices,
num_channels,
false, // skip driver allocs - no (don't skip)
true, // clean system resources - yes
true); // perform harvesting - yes

const auto ARC = cluster.get_soc_descriptor(0).arc_cores.at(0);
const tt_cxy_pair ARC_CORE(0, ARC.x, ARC.y);

set_params_for_remote_txn(cluster);
cluster.start_device(tt_device_params{});

auto get_static_tlb_index_callback = [](tt_xy_pair target) { return 0; };
cluster.setup_core_to_tlb_map(0, get_static_tlb_index_callback);

// Address of the reset unit in ARC core:
uint64_t arc_reset_noc = 0x880030000ULL;

// Offset to the scratch registers in the reset unit:
uint64_t scratch_offset = 0x60;

// Map a TLB to the reset unit in ARC core:
cluster.configure_tlb(0, ARC_CORE, 0, arc_reset_noc);

// Address of the scratch register in the reset unit:
uint64_t addr = arc_reset_noc + scratch_offset;

uint32_t value0 = 0;
uint32_t value1 = 0;
uint32_t value2 = 0;

// Read the scratch register via BAR0:
value0 = cluster.bar_read32(0, 0x1ff30060);

// Read the scratch register via the TLB:
cluster.read_from_device(&value1, ARC_CORE, addr, sizeof(uint32_t), "LARGE_READ_TLB");

// Read the scratch register via a different TLB, different code path:
cluster.read_from_device(&value2, ARC_CORE, addr, sizeof(uint32_t), "REG_TLB");

// Check that the values are the same:
EXPECT_EQ(value1, value0);
EXPECT_EQ(value2, value0);
}
// TODO issue#363
// TEST(SiliconDriverWH, RandomSysmemTestWithPcie) {
// const size_t num_channels = 2; // ideally 4, but CI seems to have 2...
// auto target_devices = get_target_devices();

// Cluster cluster(
// test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"),
// target_devices,
// num_channels,
// false, // skip driver allocs - no (don't skip)
// true, // clean system resources - yes
// true); // perform harvesting - yes

// set_params_for_remote_txn(cluster);
// cluster.start_device(tt_device_params{}); // no special parameters

// const chip_id_t mmio_chip_id = 0;
// const auto PCIE = cluster.get_soc_descriptor(mmio_chip_id).pcie_cores.at(0);
// const tt_cxy_pair PCIE_CORE(mmio_chip_id, PCIE.x, PCIE.y);
// const size_t ONE_GIG = 1 << 30;
// const size_t num_tests = 0x20000; // runs in a reasonable amount of time

// // PCIe core is at (x=0, y=3) on Wormhole NOC0.
// ASSERT_EQ(PCIE.x, 0);
// ASSERT_EQ(PCIE.y, 3);

// const uint64_t ALIGNMENT = sizeof(uint32_t);
// auto generate_aligned_address = [&](uint64_t lo, uint64_t hi) -> uint64_t {
// static std::random_device rd;
// static std::mt19937_64 gen(rd());
// std::uniform_int_distribution<uint64_t> dis(lo / ALIGNMENT, hi / ALIGNMENT);
// return dis(gen) * ALIGNMENT;
// };

// uint64_t base_address = cluster.get_pcie_base_addr_from_device(mmio_chip_id);
// for (size_t channel = 0; channel < num_channels; ++channel) {
// uint8_t* sysmem = (uint8_t*)cluster.host_dma_address(0, 0, channel);
// ASSERT_NE(sysmem, nullptr);

// test_utils::fill_with_random_bytes(sysmem, ONE_GIG);

// uint64_t lo = (ONE_GIG * channel);
// uint64_t hi = (lo + ONE_GIG) - 1;

// if (channel == 3) {
// // TODO: I thought everything past 0xffff'dddd was registers or
// // something, but a) I don't know what's actually there, and b)
// // the unusable range seems to be bigger than that... so
// // restricting to 0x8'f000'0000.
// hi &= ~0x0fff'ffffULL;
// }

// for (size_t i = 0; i < num_tests; ++i) {
// uint64_t address = generate_aligned_address(lo, hi);
// uint64_t noc_addr = base_address + address;
// uint64_t sysmem_address = address - lo;

// ASSERT_GE(address, lo) << "Address too low";
// ASSERT_LE(address, hi) << "Address too high";
// ASSERT_EQ(address % ALIGNMENT, 0) << "Address not properly aligned";

// uint32_t value = 0;
// cluster.read_from_device(&value, PCIE_CORE, noc_addr, sizeof(uint32_t), "LARGE_READ_TLB");

// uint32_t expected = *reinterpret_cast<uint32_t*>(&sysmem[sysmem_address]);
// ASSERT_EQ(value, expected) << fmt::format("Mismatch at address {:#x}", address);
// }
// }
// }

// TODO issue#364
// TEST(SiliconDriverWH, LargeAddressTlb) {
// const size_t num_channels = 1;
// auto target_devices = get_target_devices();

// Cluster cluster(
// test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"),
// target_devices,
// num_channels,
// false, // skip driver allocs - no (don't skip)
// true, // clean system resources - yes
// true); // perform harvesting - yes

// const auto ARC = cluster.get_soc_descriptor(0).arc_cores.at(0);
// const tt_cxy_pair ARC_CORE(0, ARC.x, ARC.y);

// set_params_for_remote_txn(cluster);
// cluster.start_device(tt_device_params{});

// auto get_static_tlb_index_callback = [](tt_xy_pair target) { return 0; };
// cluster.setup_core_to_tlb_map(0, get_static_tlb_index_callback);

// // Address of the reset unit in ARC core:
// uint64_t arc_reset_noc = 0x880030000ULL;

// // Offset to the scratch registers in the reset unit:
// uint64_t scratch_offset = 0x60;

// // Map a TLB to the reset unit in ARC core:
// cluster.configure_tlb(0, ARC_CORE, 0, arc_reset_noc);

// // Address of the scratch register in the reset unit:
// uint64_t addr = arc_reset_noc + scratch_offset;

// uint32_t value0 = 0;
// uint32_t value1 = 0;
// uint32_t value2 = 0;

// // Read the scratch register via BAR0:
// value0 = cluster.bar_read32(0, 0x1ff30060);

// // Read the scratch register via the TLB:
// cluster.read_from_device(&value1, ARC_CORE, addr, sizeof(uint32_t), "LARGE_READ_TLB");

// // Read the scratch register via a different TLB, different code path:
// cluster.read_from_device(&value2, ARC_CORE, addr, sizeof(uint32_t), "REG_TLB");

// // Check that the values are the same:
// EXPECT_EQ(value1, value0);
// EXPECT_EQ(value2, value0);
// }
Loading