diff --git a/.github/workflows/t3000-perplexity-tests.yaml b/.github/workflows/t3000-perplexity-tests.yaml index 83740e75758..c7d8f2d16ea 100644 --- a/.github/workflows/t3000-perplexity-tests.yaml +++ b/.github/workflows/t3000-perplexity-tests.yaml @@ -11,7 +11,7 @@ jobs: with: arch: '["wormhole_b0"]' secrets: inherit - t3000-model-perf-tests: + t3000-model-accuracy-perplexity-tests: needs: build-artifact secrets: inherit uses: ./.github/workflows/t3000-perplexity-tests-impl.yaml diff --git a/models/demos/llama3/README.md b/models/demos/llama3/README.md index 6473c2dbb71..0a5f5ab1430 100644 --- a/models/demos/llama3/README.md +++ b/models/demos/llama3/README.md @@ -27,11 +27,11 @@ The downloaded directories include weight files (e.g. `consolidated.00.pth`), th The repacked output directory can be same as the checkpoint directory, since the new files will have different names. If providing a different path, please make sure that you keep the string `3.1-70B` in the new path name, since the Llama3 codebase relies on the weights directory name to identify the correct model. -Note: Use the value of `10` for `chunk_size`. +Note: Use the default value of `10` for `chunk_size`. ``` # This concatenates the sharded checkpoints and makes it easier for us to load. -python models/demos/llama3/scripts/repack_weights_70b.py +python models/demos/llama3/scripts/repack_weights_70b.py ``` If providing a different output directory, please copy the `params.json` and the `tokenizer.model` files to the new directory. diff --git a/tests/scripts/t3000/run_t3000_demo_tests.sh b/tests/scripts/t3000/run_t3000_demo_tests.sh index fb3b54967fa..349d1bb7964 100755 --- a/tests/scripts/t3000/run_t3000_demo_tests.sh +++ b/tests/scripts/t3000/run_t3000_demo_tests.sh @@ -147,6 +147,9 @@ run_t3000_tests() { # Run llama3 smaller tests (1B, 3B, 8B, 11B) run_t3000_llama3_tests + # Run llama3 vision tests + run_t3000_llama3_vision_tests + # Run llama3_70b tests run_t3000_llama3_70b_tests diff --git a/tests/scripts/t3000/run_t3000_frequent_tests.sh b/tests/scripts/t3000/run_t3000_frequent_tests.sh index b18cde1741e..6e97502f88c 100755 --- a/tests/scripts/t3000/run_t3000_frequent_tests.sh +++ b/tests/scripts/t3000/run_t3000_frequent_tests.sh @@ -51,16 +51,16 @@ run_t3000_llama3_tests() { echo "LOG_METAL: Running run_t3000_llama3_tests" wh_arch_yaml=wormhole_b0_80_arch_eth_dispatch.yaml - # Llama3.1-8B - llama8b=/mnt/MLPerf/tt_dnn-models/llama/Meta-Llama-3.1-8B-Instruct/ # Llama3.2-1B llama1b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-1B-Instruct/ # Llama3.2-3B llama3b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-3B-Instruct/ + # Llama3.1-8B + llama8b=/mnt/MLPerf/tt_dnn-models/llama/Meta-Llama-3.1-8B-Instruct/ # Llama3.2-11B llama11b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-11B-Vision-Instruct/ - # Run all Llama3 tests for 8B, 1B, and 3B weights + # Run test model for llama3 - 1B, 3B, 8B and 11B weights for llama_dir in "$llama1b" "$llama3b" "$llama8b" "$llama11b"; do LLAMA_DIR=$llama_dir WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/test_llama_model.py -k full ; fail+=$? LLAMA_DIR=$llama_dir WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/test_llama_model_prefill.py ; fail+=$? @@ -124,6 +124,36 @@ run_t3000_llama3.2-11b-vision_freq_tests() { fi } +run_t3000_spoof_n300_llama3.2-11b-vision_freq_tests() { + # Record the start time + fail=0 + start_time=$(date +%s) + + echo "LOG_METAL: Running run_t3000_spoof_n300_llama3.2-11b-vision_freq_tests" + + wh_arch_yaml=wormhole_b0_80_arch_eth_dispatch.yaml + # Llama3.2-11B + llama11b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-11B-Vision-Instruct/ + # Use FAKE_DEVICE env variable to run on an N300 mesh + fake_device=N300 + + # Install Vision-specific packages + pip install -r models/demos/llama3/requirements.txt + + FAKE_DEVICE=$fake_device LLAMA_DIR=$llama11b WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/multimodal/test_llama_image_transformer.py ; fail+=$? + FAKE_DEVICE=$fake_device LLAMA_DIR=$llama11b WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/multimodal/test_llama_vision_encoder.py ; fail+=$? + FAKE_DEVICE=$fake_device LLAMA_DIR=$llama11b WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/multimodal/test_llama_cross_attention_transformer_text.py ; fail+=$? + FAKE_DEVICE=$fake_device LLAMA_DIR=$llama11b WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/multimodal/test_llama_cross_attention_transformer_vision.py ; fail+=$? + + # Record the end time + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo "LOG_METAL: run_t3000_spoof_n300_llama3.2-11b-vision_freq_tests $duration seconds to complete" + if [[ $fail -ne 0 ]]; then + exit 1 + fi +} + run_t3000_mixtral_tests() { # Record the start time fail=0 @@ -249,6 +279,9 @@ run_t3000_tests() { # Run Llama3.2-11B Vision tests run_t3000_llama3.2-11b-vision_freq_tests + # Run Llama3.2-11B Vision tests on spoofed N300 + run_t3000_spoof_n300_llama3.2-11b-vision_freq_tests + # Run mixtral tests run_t3000_mixtral_tests diff --git a/tests/scripts/t3000/run_t3000_perplexity_tests.sh b/tests/scripts/t3000/run_t3000_perplexity_tests.sh index 05124d41f38..653b9d163a7 100755 --- a/tests/scripts/t3000/run_t3000_perplexity_tests.sh +++ b/tests/scripts/t3000/run_t3000_perplexity_tests.sh @@ -79,7 +79,7 @@ run_t3000_mixtral8x7b_perplexity_tests() { run_t3000_llama3_perplexity_tests_single_card() { # Split long set of tests into two groups - # This one runs all the N150 and N300 tests + # This one runs all the N150 and N300 tests spoofed on a T3k fail=0 start_time=$(date +%s) diff --git a/tests/scripts/t3000/run_t3000_unit_tests.sh b/tests/scripts/t3000/run_t3000_unit_tests.sh index 64c9a90eb29..9c790cc281c 100755 --- a/tests/scripts/t3000/run_t3000_unit_tests.sh +++ b/tests/scripts/t3000/run_t3000_unit_tests.sh @@ -213,6 +213,41 @@ run_t3000_llama3.2-11b-vision_unit_tests() { fi } +run_t3000_spoof_n300_llama3.2-11b-vision_unit_tests() { + # Record the start time + fail=0 + start_time=$(date +%s) + + echo "LOG_METAL: Running run_t3000_spoof_n300_llama3.2-11b-vision_unit_tests" + + wh_arch_yaml=wormhole_b0_80_arch_eth_dispatch.yaml + # Llama3.2-11B + llama11b=/mnt/MLPerf/tt_dnn-models/llama/Llama3.2-11B-Vision-Instruct/ + # Use FAKE_DEVICE env variable to run on an N300 mesh + fake_device=N300 + + # Install Vision-specific packages + pip install -r models/demos/llama3/requirements.txt + + FAKE_DEVICE=$fake_device LLAMA_DIR=$llama11b WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/multimodal/test_llama_image_mlp.py ; fail+=$? + FAKE_DEVICE=$fake_device LLAMA_DIR=$llama11b WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/multimodal/test_llama_image_attention.py ; fail+=$? + FAKE_DEVICE=$fake_device LLAMA_DIR=$llama11b WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/multimodal/test_llama_image_block.py ; fail+=$? + FAKE_DEVICE=$fake_device LLAMA_DIR=$llama11b WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/multimodal/test_llama_cross_attention.py ; fail+=$? + FAKE_DEVICE=$fake_device LLAMA_DIR=$llama11b WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/multimodal/test_llama_cross_block.py ; fail+=$? + FAKE_DEVICE=$fake_device LLAMA_DIR=$llama11b WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/multimodal/test_llama_conv2d_patch.py ; fail+=$? + FAKE_DEVICE=$fake_device LLAMA_DIR=$llama11b WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/multimodal/test_llama_class_embedding.py ; fail+=$? + FAKE_DEVICE=$fake_device LLAMA_DIR=$llama11b WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/multimodal/test_llama_tile_position_embedding.py ; fail+=$? + FAKE_DEVICE=$fake_device LLAMA_DIR=$llama11b WH_ARCH_YAML=$wh_arch_yaml pytest -n auto models/demos/llama3/tests/multimodal/test_llama_positional_embedding.py ; fail+=$? + + # Record the end time + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo "LOG_METAL: run_t3000_spoof_n300_llama3.2-11b-vision_unit_tests $duration seconds to complete" + if [[ $fail -ne 0 ]]; then + exit 1 + fi +} + run_t3000_mixtral_tests() { # Record the start time fail=0 @@ -302,6 +337,9 @@ run_t3000_tests() { # Run llama3.2-11B-vision tests run_t3000_llama3.2-11b-vision_unit_tests + # Run llama3.2-11B-vision tests on spoofed N300 mesh + run_t3000_spoof_n300_llama3.2-11b-vision_unit_tests + # Run mixtral tests run_t3000_mixtral_tests