Skip to content

Commit

Permalink
[Llama3] Add test-accuracy to CI (#15778)
Browse files Browse the repository at this point in the history
  • Loading branch information
mtairum authored Dec 10, 2024
1 parent a275320 commit 4bcc79b
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 5 deletions.
1 change: 1 addition & 0 deletions .github/workflows/t3000-frequent-tests-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ jobs:
{ name: "t3k llama3.2-vision tests", arch: wormhole_b0, cmd: run_t3000_llama3.2-11b-vision_freq_tests, timeout: 60, owner_id: U03FJB5TM5Y}, #Colman Glagovich
{ name: "t3k n300 mesh llama3.2-vision tests", arch: wormhole_b0, cmd: run_t3000_spoof_n300_llama3.2-11b-vision_freq_tests, timeout: 60, owner_id: U03FJB5TM5Y}, #Colman Glagovich
{ name: "t3k llama3 tests", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 45, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
{ name: "t3k llama3 accuracy tests", arch: wormhole_b0, cmd: run_t3000_llama3_accuracy_tests, timeout: 45, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
{ name: "t3k llama2_70b tests", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 45, owner_id: U03FJB5TM5Y}, #Colman Glagovich
# { name: "t3k llama3_70b tests", arch: wormhole_b0, cmd: run_t3000_llama3_70b_tests, timeout: 45, owner_id: U03FJB5TM5Y}, #Colman Glagovich # FIXME issue #14934
{ name: "t3k mixtral tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 60, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
Expand Down
8 changes: 4 additions & 4 deletions models/demos/llama3/PERF.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ This configuration uses bfp4 MLP FF1+FF3 for all models.
|-------|--------|-----------|-----------|---------------|
| 1b | N150 | 79 | 98 | 90.5 |
| 1b | N300 | 81 | 98 | 101.7 |
| 1b | T3K | 81 | 98 | 97.5 |
| 1b | T3K | 81 | 98 | 96.8 |
| 3b | N150 | 85 | 96 | 49.0 |
| 3b | N300 | 88 | 97 | 56.9 |
| 3b | T3K | 88 | 97 | 54.5 |
| 8b | N150 | 86 | 98 | 28.4 |
| 8b | N300 | 84 | 98 | 38.6 |
| 8b | T3K | 84 | 98 | 52.6 |
| 8b | T3K | 84 | 97 | 52.6 |
| 11b | N300 | 86 | 97 | 38.6 |
| 11b | T3K | 84 | 98 | 52.6 |
| 70b | T3K | 95 | 100 | 14.3 |
| 70b | T3K | 94 | 100 | 14.3 |

## LlamaOptimizations.accuracy

Expand All @@ -40,4 +40,4 @@ This configuration uses bfp4 MLP FF1+FF3 only for the 3.1-70B model.
| 8b | T3K | 88 | 97 | 49.9 |
| 11b | N300 | 90 | 97 | 33.8 |
| 11b | T3K | 88 | 97 | 52.6 |
| 70b | T3K | 95 | 100 | 14.5 |
| 70b | T3K | 94 | 100 | 14.5 |
14 changes: 14 additions & 0 deletions tests/scripts/single_card/run_single_card_demo_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,20 @@ run_common_func_tests() {
# Qwen7B
QWEN_DIR=/mnt/MLPerf/tt_dnn-models/qwen/Qwen2-7B-Instruct WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml FAKE_DEVICE=N150 pytest -n auto models/demos/qwen/demo/demo.py -k instruct --timeout 420; fail+=$?

# Llama3 Accuracy tests
# Llama3.2-1B
llama1b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-1B-Instruct/
# Llama3.2-3B
llama3b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-3B-Instruct/
# Llama3.1-8B (11B weights are the same)
llama8b=/mnt/MLPerf/tt_dnn-models/llama/Meta-Llama-3.1-8B-Instruct/

# Run Llama3 accuracy tests for 1B, 3B, 8B weights
for llama_dir in "$llama1b" "$llama3b" "$llama8b"; do
LLAMA_DIR=$llama_dir WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/tests/test_llama_accuracy.py -k perf --timeout 420; fail+=$?
echo "LOG_METAL: Llama3 accuracy tests for $llama_dir completed"
done

#VGG11/VGG16
WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/vgg/demo/demo.py --timeout 600; fail+=$?

Expand Down
39 changes: 38 additions & 1 deletion tests/scripts/t3000/run_t3000_frequent_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ run_t3000_llama3_tests() {
# Run test model for llama3 - 1B, 3B, 8B and 11B weights
for llama_dir in "$llama1b" "$llama3b" "$llama8b" "$llama11b"; do
LLAMA_DIR=$llama_dir WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/test_llama_model.py -k full ; fail+=$?
# LLAMA_DIR=$llama_dir WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/test_llama_model_prefill.py ; fail+=$? # FIXME Issue #14843
LLAMA_DIR=$llama_dir WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/test_llama_model_prefill.py ; fail+=$?
echo "LOG_METAL: Llama3 tests for $llama_dir completed"
done

Expand Down Expand Up @@ -96,6 +96,40 @@ run_t3000_llama3_70b_tests() {
fi
}

run_t3000_llama3_accuracy_tests() {
# Record the start time
fail=0
start_time=$(date +%s)

echo "LOG_METAL: Running run_t3000_llama3_accuracy_tests"

wh_arch_yaml=wormhole_b0_80_arch_eth_dispatch.yaml
# Llama3.2-1B
llama1b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-1B-Instruct/
# Llama3.2-3B
llama3b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-3B-Instruct/
# Llama3.1-8B
llama8b=/mnt/MLPerf/tt_dnn-models/llama/Meta-Llama-3.1-8B-Instruct/
# Llama3.2-11B
llama11b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-11B-Vision-Instruct/
# Llama3.1-70B
llama70b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.1-70B-Instruct/

# Run test accuracy llama3 - 1B, 3B, 8B, 11B and 70B weights
for llama_dir in "$llama1b" "$llama3b" "$llama8b" "$llama11b" "$llama70b"; do
LLAMA_DIR=$llama_dir WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/test_llama_accuracy.py -k perf ; fail+=$?
echo "LOG_METAL: Llama3 accuracy tests for $llama_dir completed"
done

# Record the end time
end_time=$(date +%s)
duration=$((end_time - start_time))
echo "LOG_METAL: run_t3000_llama3_accuracy_tests $duration seconds to complete"
if [[ $fail -ne 0 ]]; then
exit 1
fi
}

run_t3000_llama3.2-11b-vision_freq_tests() {
# Record the start time
fail=0
Expand Down Expand Up @@ -277,6 +311,9 @@ run_t3000_tests() {
# Run llama3-70b tests
run_t3000_llama3_70b_tests

# Run llama3 accuracy tests
run_t3000_llama3_accuracy_tests

# Run Llama3.2-11B Vision tests
run_t3000_llama3.2-11b-vision_freq_tests

Expand Down

0 comments on commit 4bcc79b

Please sign in to comment.