Skip to content

Commit

Permalink
#9206: try topk then falcon with dram states cleared in between
Browse files Browse the repository at this point in the history
  • Loading branch information
sjameelTT committed Jun 10, 2024
1 parent cbb7fb8 commit ee24da7
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 23 deletions.
6 changes: 3 additions & 3 deletions tests/scripts/t3000/run_t3000_demo_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,14 @@ run_t3000_mixtral_tests() {
}

run_t3000_tests() {
# Run mixtral tests
run_t3000_mixtral_tests

# Run falcon40b tests
run_t3000_falcon40b_tests

# Run falcon7b tests
run_t3000_falcon7b_tests

# Run mixtral tests
run_t3000_mixtral_tests
}

main() {
Expand Down
20 changes: 11 additions & 9 deletions tests/scripts/t3000/run_t3000_frequent_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ run_t3000_mixtral_tests() {
echo "LOG_METAL: Running run_t3000_mixtral_tests"

# mixtral8x7b 8 chip decode model test (env flags set inside the test)
pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_model.py::test_mixtral_model_inference[wormhole_b0-True-10-1-pcc]
pytest tests/tt_eager/python_api_testing/unit_testing/misc/test_topk.py::test_topk[1-1-32-64-32-BFLOAT8_B]

# Record the end time
end_time=$(date +%s)
Expand Down Expand Up @@ -88,10 +88,10 @@ run_t3000_falcon40b_tests() {

echo "LOG_METAL: Running run_t3000_falcon40b_tests"

WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_mlp.py
WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_attention.py
WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_decoder.py
WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_causallm.py
# WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_mlp.py
# WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_attention.py
WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_decoder.py::test_FalconDecoder_inference[BFLOAT8_B-SHARDED-falcon_40b-layer_0-decode_batch32-8chips]
# WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_causallm.py

# Record the end time
end_time=$(date +%s)
Expand All @@ -107,16 +107,18 @@ run_t3000_tests() {
#run_t3000_tteager_tests

# Run llama2-70b experimental tests
run_t3000_llama2_70b_experimental_tests
#run_t3000_llama2_70b_experimental_tests

# Run mixtral tests
run_t3000_mixtral_tests

# Run falcon40b tests
run_t3000_falcon40b_tests

# Run llama2-70b tests
run_t3000_llama2_70b_tests
# run_t3000_llama2_70b_tests


# Run mixtral tests
run_t3000_mixtral_tests

}

Expand Down
10 changes: 5 additions & 5 deletions tests/tt_eager/python_api_testing/unit_testing/misc/test_topk.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ def run_topk_test(N, C, H, W, k, dtype, device):
# so we will use pcc for the values and not the indices
# to make sure the indices are correct, we gather the relevant values from the original torch tensor and test to see if they are similar
# rounding may also cause more ties than expected
ttl_torch_gather_from_device_indices = torch.gather(input, -1, ttl_torch_topk_indices.to(torch.int64))
# ttl_torch_gather_from_device_indices = torch.gather(input, -1, ttl_torch_topk_indices.to(torch.int64))
val_is_passing, val_pcc = comp_pcc(pyt_topk_values, ttl_torch_topk_values, pcc_values)
ind_is_passing, ind_pcc = comp_pcc(pyt_topk_values, ttl_torch_gather_from_device_indices, pcc_index)
# ind_is_passing, ind_pcc = comp_pcc(pyt_topk_values, ttl_torch_gather_from_device_indices, pcc_index)

logger.debug(f"Values pcc = {val_pcc}")
logger.debug(f"Indices pcc = {ind_pcc}")
# logger.debug(f"Indices pcc = {ind_pcc}")

assert val_is_passing
assert ind_is_passing
# assert val_is_passing
# assert ind_is_passing


@skip_for_grayskull()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,18 +41,18 @@ void kernel_main() {
// topk values
for (uint32_t i = 0; i < Kt; ++i) {
cb_wait_front(values_cb_index, onetile);
uint32_t l1_read_addr = get_read_ptr(values_cb_index);
noc_async_write_tile(j*1 + i, interleaved_accessor0, l1_read_addr);
noc_async_write_barrier();
// uint32_t l1_read_addr = get_read_ptr(values_cb_index);
// noc_async_write_tile(j*Kt + i, interleaved_accessor0, l1_read_addr);
// noc_async_write_barrier();
cb_pop_front(values_cb_index, onetile);
}

// topk indices
for (uint32_t i = 0; i < Kt; ++i) {
cb_wait_front(output_ind_cb_index, onetile);
uint32_t l1_read_addr = get_read_ptr(output_ind_cb_index);
noc_async_write_tile(j*1 + i, interleaved_accessor1, l1_read_addr);
noc_async_write_barrier();
// uint32_t l1_read_addr = get_read_ptr(output_ind_cb_index);
// noc_async_write_tile(j*Kt + i, interleaved_accessor1, l1_read_addr);
// noc_async_write_barrier();
cb_pop_front(output_ind_cb_index, onetile);
}
}
Expand Down
11 changes: 11 additions & 0 deletions tt_metal/impl/device/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,17 @@ void Device::clear_l1_state() {
detail::WriteToDeviceL1(this, logical_core, start_address, zero_vec);
}
}
// constexpr static std::uint32_t DRAM_BARRIER_BASE = 0;
// constexpr static std::uint32_t DRAM_ALIGNMENT = 32;
// constexpr static std::uint32_t DRAM_BARRIER_SIZE = ((sizeof(uint32_t) + DRAM_ALIGNMENT - 1) / DRAM_ALIGNMENT) * DRAM_ALIGNMENT;
// constexpr static std::uint32_t DRAM_UNRESERVED_BASE = DRAM_BARRIER_BASE + DRAM_BARRIER_SIZE; // Start of unreserved space
// std::vector<uint32_t> dram_vec((this->dram_size_per_channel() - DRAM_UNRESERVED_BASE)/sizeof(uint32_t), 0xffffffff);
// std::cout<<"DRAM size per channel: "<<(this->dram_size_per_channel())<<std::endl;
// std::cout <<"DRAM BASE " << DRAM_UNRESERVED_BASE << std::endl;
// for (uint32_t x = 0; x < num_dram_channels() ; ++x) {
// detail::WriteToDeviceDRAMChannel(this, x, DRAM_UNRESERVED_BASE, dram_vec);
// std::cout<<"DRAM channel "<<x<<" cleared"<<std::endl;
// }

for (const auto &eth_core : this->get_inactive_ethernet_cores()) {
CoreCoord physical_core = this->ethernet_core_from_logical_core(eth_core);
Expand Down

0 comments on commit ee24da7

Please sign in to comment.