diff --git a/.github/workflows/t3000-frequent-tests-impl.yaml b/.github/workflows/t3000-frequent-tests-impl.yaml index fde2ede1652..11a2df7b146 100644 --- a/.github/workflows/t3000-frequent-tests-impl.yaml +++ b/.github/workflows/t3000-frequent-tests-impl.yaml @@ -21,6 +21,7 @@ jobs: { name: "t3k llama3.2-vision tests", arch: wormhole_b0, cmd: run_t3000_llama3.2-11b-vision_freq_tests, timeout: 60, owner_id: U03FJB5TM5Y}, #Colman Glagovich { name: "t3k n300 mesh llama3.2-vision tests", arch: wormhole_b0, cmd: run_t3000_spoof_n300_llama3.2-11b-vision_freq_tests, timeout: 60, owner_id: U03FJB5TM5Y}, #Colman Glagovich { name: "t3k llama3 tests", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 45, owner_id: U03PUAKE719}, #Miguel Tairum Cruz + { name: "t3k llama3 accuracy tests", arch: wormhole_b0, cmd: run_t3000_llama3_accuracy_tests, timeout: 45, owner_id: U03PUAKE719}, #Miguel Tairum Cruz { name: "t3k llama2_70b tests", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 45, owner_id: U03FJB5TM5Y}, #Colman Glagovich # { name: "t3k llama3_70b tests", arch: wormhole_b0, cmd: run_t3000_llama3_70b_tests, timeout: 45, owner_id: U03FJB5TM5Y}, #Colman Glagovich # FIXME issue #14934 { name: "t3k mixtral tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 60, owner_id: U03PUAKE719}, #Miguel Tairum Cruz diff --git a/models/demos/llama3/PERF.md b/models/demos/llama3/PERF.md index dd060a14c1c..f0dbf00ec4b 100644 --- a/models/demos/llama3/PERF.md +++ b/models/demos/llama3/PERF.md @@ -12,16 +12,16 @@ This configuration uses bfp4 MLP FF1+FF3 for all models. |-------|--------|-----------|-----------|---------------| | 1b | N150 | 79 | 98 | 90.5 | | 1b | N300 | 81 | 98 | 101.7 | -| 1b | T3K | 81 | 98 | 97.5 | +| 1b | T3K | 81 | 98 | 96.8 | | 3b | N150 | 85 | 96 | 49.0 | | 3b | N300 | 88 | 97 | 56.9 | | 3b | T3K | 88 | 97 | 54.5 | | 8b | N150 | 86 | 98 | 28.4 | | 8b | N300 | 84 | 98 | 38.6 | -| 8b | T3K | 84 | 98 | 52.6 | +| 8b | T3K | 84 | 97 | 52.6 | | 11b | N300 | 86 | 97 | 38.6 | | 11b | T3K | 84 | 98 | 52.6 | -| 70b | T3K | 95 | 100 | 14.3 | +| 70b | T3K | 94 | 100 | 14.3 | ## LlamaOptimizations.accuracy @@ -40,4 +40,4 @@ This configuration uses bfp4 MLP FF1+FF3 only for the 3.1-70B model. | 8b | T3K | 88 | 97 | 49.9 | | 11b | N300 | 90 | 97 | 33.8 | | 11b | T3K | 88 | 97 | 52.6 | -| 70b | T3K | 95 | 100 | 14.5 | +| 70b | T3K | 94 | 100 | 14.5 | diff --git a/tests/scripts/single_card/run_single_card_demo_tests.sh b/tests/scripts/single_card/run_single_card_demo_tests.sh index 053f5b966af..11a4e96a895 100755 --- a/tests/scripts/single_card/run_single_card_demo_tests.sh +++ b/tests/scripts/single_card/run_single_card_demo_tests.sh @@ -15,6 +15,20 @@ run_common_func_tests() { # Qwen7B QWEN_DIR=/mnt/MLPerf/tt_dnn-models/qwen/Qwen2-7B-Instruct WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml FAKE_DEVICE=N150 pytest -n auto models/demos/qwen/demo/demo.py -k instruct --timeout 420; fail+=$? + # Llama3 Accuracy tests + # Llama3.2-1B + llama1b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-1B-Instruct/ + # Llama3.2-3B + llama3b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-3B-Instruct/ + # Llama3.1-8B (11B weights are the same) + llama8b=/mnt/MLPerf/tt_dnn-models/llama/Meta-Llama-3.1-8B-Instruct/ + + # Run Llama3 accuracy tests for 1B, 3B, 8B weights + for llama_dir in "$llama1b" "$llama3b" "$llama8b"; do + LLAMA_DIR=$llama_dir WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/tests/test_llama_accuracy.py -k perf --timeout 420; fail+=$? + echo "LOG_METAL: Llama3 accuracy tests for $llama_dir completed" + done + #VGG11/VGG16 WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/vgg/demo/demo.py --timeout 600; fail+=$? diff --git a/tests/scripts/t3000/run_t3000_frequent_tests.sh b/tests/scripts/t3000/run_t3000_frequent_tests.sh index 0058a3fc9e3..3ade2f43355 100755 --- a/tests/scripts/t3000/run_t3000_frequent_tests.sh +++ b/tests/scripts/t3000/run_t3000_frequent_tests.sh @@ -63,7 +63,7 @@ run_t3000_llama3_tests() { # Run test model for llama3 - 1B, 3B, 8B and 11B weights for llama_dir in "$llama1b" "$llama3b" "$llama8b" "$llama11b"; do LLAMA_DIR=$llama_dir WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/test_llama_model.py -k full ; fail+=$? - # LLAMA_DIR=$llama_dir WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/test_llama_model_prefill.py ; fail+=$? # FIXME Issue #14843 + LLAMA_DIR=$llama_dir WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/test_llama_model_prefill.py ; fail+=$? echo "LOG_METAL: Llama3 tests for $llama_dir completed" done @@ -96,6 +96,40 @@ run_t3000_llama3_70b_tests() { fi } +run_t3000_llama3_accuracy_tests() { + # Record the start time + fail=0 + start_time=$(date +%s) + + echo "LOG_METAL: Running run_t3000_llama3_accuracy_tests" + + wh_arch_yaml=wormhole_b0_80_arch_eth_dispatch.yaml + # Llama3.2-1B + llama1b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-1B-Instruct/ + # Llama3.2-3B + llama3b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-3B-Instruct/ + # Llama3.1-8B + llama8b=/mnt/MLPerf/tt_dnn-models/llama/Meta-Llama-3.1-8B-Instruct/ + # Llama3.2-11B + llama11b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-11B-Vision-Instruct/ + # Llama3.1-70B + llama70b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.1-70B-Instruct/ + + # Run test accuracy llama3 - 1B, 3B, 8B, 11B and 70B weights + for llama_dir in "$llama1b" "$llama3b" "$llama8b" "$llama11b" "$llama70b"; do + LLAMA_DIR=$llama_dir WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/test_llama_accuracy.py -k perf ; fail+=$? + echo "LOG_METAL: Llama3 accuracy tests for $llama_dir completed" + done + + # Record the end time + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo "LOG_METAL: run_t3000_llama3_accuracy_tests $duration seconds to complete" + if [[ $fail -ne 0 ]]; then + exit 1 + fi +} + run_t3000_llama3.2-11b-vision_freq_tests() { # Record the start time fail=0 @@ -277,6 +311,9 @@ run_t3000_tests() { # Run llama3-70b tests run_t3000_llama3_70b_tests + # Run llama3 accuracy tests + run_t3000_llama3_accuracy_tests + # Run Llama3.2-11B Vision tests run_t3000_llama3.2-11b-vision_freq_tests