From 3498cdcf392e28fa58bda73a09a54fcc8b702016 Mon Sep 17 00:00:00 2001 From: Vincent Tang Date: Tue, 25 Jun 2024 23:27:08 +0000 Subject: [PATCH] #0: custom timeouts for longer tests (revert this before) --- models/demos/t3000/mixtral8x7b/demo/demo.py | 1 - .../llama2_70b/tests/test_llama_perf_decode.py | 1 - tests/scripts/run_performance.sh | 8 ++++---- tests/scripts/t3000/run_t3000_demo_tests.sh | 8 ++++---- tests/scripts/t3000/run_t3000_frequent_tests.sh | 8 ++++---- tests/scripts/t3000/run_t3000_model_perf_tests.sh | 4 ++-- 6 files changed, 14 insertions(+), 16 deletions(-) diff --git a/models/demos/t3000/mixtral8x7b/demo/demo.py b/models/demos/t3000/mixtral8x7b/demo/demo.py index e52ee174fc8..8015cf8a733 100644 --- a/models/demos/t3000/mixtral8x7b/demo/demo.py +++ b/models/demos/t3000/mixtral8x7b/demo/demo.py @@ -300,7 +300,6 @@ def run_mixtral_demo(user_input, batch_size, device_mesh, instruct_mode): logger.info("[User {}] {}".format(user, "".join(tokenizer.decode(all_outputs[user])))) -@pytest.mark.timeout(10000) @pytest.mark.parametrize( "input_prompts, instruct_weights", [ diff --git a/models/experimental/llama2_70b/tests/test_llama_perf_decode.py b/models/experimental/llama2_70b/tests/test_llama_perf_decode.py index e7b52f85c39..a3ec86c6c2a 100644 --- a/models/experimental/llama2_70b/tests/test_llama_perf_decode.py +++ b/models/experimental/llama2_70b/tests/test_llama_perf_decode.py @@ -238,7 +238,6 @@ def run_test_LlamaModel_end_to_end( @skip_for_grayskull("Requires eth connected devices to run") -@pytest.mark.timeout(240000) @pytest.mark.model_perf_t3000 @pytest.mark.parametrize( "generation_length, expected_compile_time, expected_inference_time", diff --git a/tests/scripts/run_performance.sh b/tests/scripts/run_performance.sh index dc9e7c35ae7..a16d36789a1 100755 --- a/tests/scripts/run_performance.sh +++ b/tests/scripts/run_performance.sh @@ -36,10 +36,10 @@ run_perf_models_llm_javelin() { env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests -m $test_marker if [ "$tt_arch" == "wormhole_b0" ]; then - env pytest -n auto models/demos/mamba/tests -m $test_marker + env pytest -n auto models/demos/mamba/tests -m $test_marker --timeout=360 fi - env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/wormhole/mistral7b/tests -m $test_marker + env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/wormhole/mistral7b/tests -m $test_marker --timeout=360 ## Merge all the generated reports env python models/perf/merge_perf_results.py @@ -50,7 +50,7 @@ run_perf_models_cnn_javelin() { local test_marker=$2 # Run tests - env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto tests/device_perf_tests/stable_diffusion -m $test_marker + env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto tests/device_perf_tests/stable_diffusion -m $test_marker --timeout=480 #env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/functional_unet/tests -m $test_marker ## Merge all the generated reports @@ -82,7 +82,7 @@ run_device_perf_models() { fi if [ "$tt_arch" == "wormhole_b0" ]; then - env pytest models/demos/mamba/tests -m $test_marker + env pytest models/demos/mamba/tests -m $test_marker --timeout=360 env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/metal_BERT_large_11/tests -m $test_marker #env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/functional_unet/tests -m $test_marker diff --git a/tests/scripts/t3000/run_t3000_demo_tests.sh b/tests/scripts/t3000/run_t3000_demo_tests.sh index d0f984dcfcb..d838e58aa8a 100755 --- a/tests/scripts/t3000/run_t3000_demo_tests.sh +++ b/tests/scripts/t3000/run_t3000_demo_tests.sh @@ -10,7 +10,7 @@ run_t3000_falcon40b_tests() { echo "LOG_METAL: Running run_t3000_falcon40b_tests" # Falcon40B prefill 60 layer end to end with 10 loops; we need 8x8 grid size - WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_60_layer_t3000_prefill_10_loops.py ; fail+=$? + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_60_layer_t3000_prefill_10_loops.py --timeout=720; fail+=$? # Falcon40B end to end demo (prefill + decode) WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_t3000_demo_loops.py ; fail+=$? @@ -38,8 +38,8 @@ run_t3000_falcon7b_tests(){ WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-default_mode_1024_greedy_verify] ; fail+=$? # Falcon7B perplexity test (prefill and decode) - WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests/test_perplexity_falcon.py::test_perplexity[True-prefill_seq1024_dram] ; fail+=$? - WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests/test_perplexity_falcon.py::test_perplexity[True-decode_1024_l1_sharded] ; fail+=$? + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests/test_perplexity_falcon.py::test_perplexity[True-prefill_seq1024_dram] --timeout=720 ; fail+=$? + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests/test_perplexity_falcon.py::test_perplexity[True-decode_1024_l1_sharded] --timeout=720 ; fail+=$? # Record the end time end_time=$(date +%s) @@ -58,7 +58,7 @@ run_t3000_mixtral_tests() { echo "LOG_METAL: Running run_t3000_mixtral8x7b_tests" # mixtral8x7b 8 chip demo test - 100 token generation with general weights (env flags set inside the test) - pytest -n auto models/demos/t3000/mixtral8x7b/demo/demo.py::test_mixtral8x7b_demo[wormhole_b0-True-general_weights] ; fail+=$? + pytest -n auto models/demos/t3000/mixtral8x7b/demo/demo.py::test_mixtral8x7b_demo[wormhole_b0-True-general_weights] --timeout=720; fail+=$? # Record the end time end_time=$(date +%s) diff --git a/tests/scripts/t3000/run_t3000_frequent_tests.sh b/tests/scripts/t3000/run_t3000_frequent_tests.sh index 203ffd436d6..6bed69acda9 100755 --- a/tests/scripts/t3000/run_t3000_frequent_tests.sh +++ b/tests/scripts/t3000/run_t3000_frequent_tests.sh @@ -49,7 +49,7 @@ run_t3000_llama2_70b_experimental_tests() { # WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/llama2_70b/tests/test_llama_mlp_t3000.py # WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/llama2_70b/tests/test_llama_attention_t3000.py WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/experimental/llama2_70b/tests/test_llama_decoder_t3000.py ; fail+=$? - WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/experimental/llama2_70b/tests/test_llama_model_t3000.py ; fail+=$? + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/experimental/llama2_70b/tests/test_llama_model_t3000.py --timeout=900; fail+=$? # Record the end time end_time=$(date +%s) @@ -108,9 +108,9 @@ run_t3000_falcon40b_tests() { echo "LOG_METAL: Running run_t3000_falcon40b_tests" WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_mlp.py ; fail+=$? - WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_attention.py ; fail+=$? - WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_decoder.py ; fail+=$? - WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_causallm.py ; fail+=$? + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_attention.py --timeout=480; fail+=$? + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_decoder.py --timeout=480; fail+=$? + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_causallm.py --timeout=600; fail+=$? # Record the end time end_time=$(date +%s) diff --git a/tests/scripts/t3000/run_t3000_model_perf_tests.sh b/tests/scripts/t3000/run_t3000_model_perf_tests.sh index 4176d15afb5..937ce20c0e8 100755 --- a/tests/scripts/t3000/run_t3000_model_perf_tests.sh +++ b/tests/scripts/t3000/run_t3000_model_perf_tests.sh @@ -36,7 +36,7 @@ run_t3000_llama2_70b_tests() { echo "LOG_METAL: Running run_t3000_llama2_70b_tests" - env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/experimental/llama2_70b/tests/test_llama_perf_decode.py -m "model_perf_t3000" ; fail+=$? + env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/experimental/llama2_70b/tests/test_llama_perf_decode.py -m "model_perf_t3000" --timeout=600 ; fail+=$? # Record the end time end_time=$(date +%s) @@ -50,7 +50,7 @@ run_t3000_falcon40b_tests() { echo "LOG_METAL: Running run_t3000_falcon40b_tests" - env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_perf_falcon.py -m "model_perf_t3000" ; fail+=$? + env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_perf_falcon.py -m "model_perf_t3000" --timeout=600 ; fail+=$? # Record the end time end_time=$(date +%s)