Skip to content

Commit

Permalink
#13368: Add llaama3-70b to CI tests. Revamped CI perplexity test (TOD…
Browse files Browse the repository at this point in the history
…O: add llama3 to topk/perplexity)
  • Loading branch information
mtairum committed Oct 25, 2024
1 parent 0bf386b commit 3e70b34
Show file tree
Hide file tree
Showing 12 changed files with 220 additions and 61 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/t3000-demo-tests-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ jobs:
matrix:
test-group: [
{ name: "t3k_falcon40b_tests", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 50, owner_id: U053W15B6JF}, #Djordje Ivanovic
{ name: "t3k_llama3_70b_tests", arch: wormhole_b0, cmd: run_t3000_llama3_70b_tests, timeout: 30, owner_id: U03FJB5TM5Y}, #Colman Glagovich
{ name: "t3k_llama3_tests", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 30, owner_id: U03PUAKE719}, # Miguel Tairum
{ name: "t3k_llama3_70b_tests", arch: wormhole_b0, cmd: run_t3000_llama3_70b_tests, timeout: 30, owner_id: U03FJB5TM5Y}, #Colman Glagovich
{ name: "t3k_falcon7b_tests", arch: wormhole_b0, cmd: run_t3000_falcon7b_tests, timeout: 90, owner_id: U05RWH3QUPM}, #Salar Hosseini
{ name: "t3k_mixtral_tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 50, owner_id: U03PUAKE719}, # Miguel Tairum
]
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/t3000-frequent-tests-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ jobs:
{ name: "t3k ethernet tests", arch: wormhole_b0, cmd: run_t3000_ethernet_tests, timeout: 60, owner_id: ULMEPM2MA}, #Sean Nijjar
{ name: "t3k trace stress tests", arch: wormhole_b0, cmd: run_t3000_trace_stress_tests, timeout: 120, owner_id: U03NG0A5ND7}, #Aditya Saigal
{ name: "t3k falcon40b tests", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 120, owner_id: U04S2UV6L8N}, #Sofija Jovic
{ name: "t3k llama2_70b tests", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 60, owner_id: U03FJB5TM5Y}, #Colman Glagovich
{ name: "t3k llama3 tests", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 60, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
{ name: "t3k llama3 tests", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 45, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
{ name: "t3k llama2_70b tests", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 45, owner_id: U03FJB5TM5Y}, #Colman Glagovich
{ name: "t3k llama3_70b tests", arch: wormhole_b0, cmd: run_t3000_llama3_70b_tests, timeout: 45, owner_id: U03FJB5TM5Y}, #Colman Glagovich
{ name: "t3k mixtral tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 60, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
{ name: "t3k resnet tests", arch: wormhole_b0, cmd: run_t3000_resnet_tests, timeout: 30, owner_id: U013121KDH9}, #Austin Ho
]
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/t3000-model-perf-tests-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ jobs:
test-group: [
{ name: "t3k LLM falcon7b model perf tests", model: "falcon7b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon7b_tests, timeout: 75, owner_id: U05RWH3QUPM}, # Salar Hosseini
{ name: "t3k LLM mixtral model perf tests", model: "mixtral", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 75, owner_id: U03PUAKE719}, # Miguel Tairum
{ name: "t3k LLM llama2 model perf tests", model: "llama2", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 75, owner_id: U03FJB5TM5Y}, # Colman Glagovich
{ name: "t3k LLM llama3 model perf tests", model: "llama3", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 75, owner_id: U03PUAKE719}, # Miguel Tairum
{ name: "t3k LLM llama2-70B model perf tests", model: "llama2", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 75, owner_id: U03FJB5TM5Y}, # Colman Glagovich
{ name: "t3k LLM llama3-70B model perf tests", model: "llama3", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama3_70b_tests, timeout: 60, owner_id: U03FJB5TM5Y}, # Colman Glagovich
{ name: "t3k LLM llama3 model perf tests", model: "llama3", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 60, owner_id: U03PUAKE719}, # Miguel Tairum
{ name: "t3k LLM falcon40b model perf tests", model: "falcon40b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 75, owner_id: U053W15B6JF}, # Djordje Ivanovic
{ name: "t3k CNN resnet50 model perf tests", model: "resnet50", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_resnet50_tests, timeout: 75, owner_id: U013121KDH9}, # Austin Ho
#{ name: "t3k CNN model perf tests ", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_cnn_tests, timeout: 120, owner_id: }, #No tests are being run?
Expand Down
52 changes: 52 additions & 0 deletions .github/workflows/t3000-perplexity-tests-impl.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: "[internal] T3000 perplexity tests impl"

on:
workflow_call:

jobs:

t3000-accuracy-perplexity-tests:
strategy:
fail-fast: false
matrix:
test-group: [
{ name: "t3k_falcon7b_tests", arch: wormhole_b0, cmd: run_t3000_falcon7b_perplexity_tests, timeout: 480, owner_id: U05RWH3QUPM}, #Salar Hosseini
{ name: "t3k_falcon40b_tests", arch: wormhole_b0, cmd: run_t3000_falcon40b_perplexity_tests, timeout: 49, owner_id: U05RWH3QUPM}, #Salar Hosseini
{ name: "t3k_llama_70b_tests", arch: wormhole_b0, cmd: run_t3000_llama70b_perplexity_tests, timeout: 30, owner_id: U05RWH3QUPM}, #Salar Hosseini
{ name: "t3k_mixtral_tests", arch: wormhole_b0, cmd: run_t3000_mixtral8x7b_perplexity_tests, timeout: 50, owner_id: U03PUAKE719}, # Miguel Tairum
]

name: ${{ matrix.test-group.name }}
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ matrix.test-group.arch }}
LOGURU_LEVEL: INFO
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
environment: dev
runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-perf"]
steps:
- uses: tenstorrent-metal/metal-workflows/.github/actions/[email protected]
- uses: ./.github/actions/ensure-active-weka-mount
- name: Set up dynamic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
- uses: actions/download-artifact@v4
with:
name: TTMetal_build_${{ matrix.test-group.arch }}
- name: Extract files
run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
- uses: ./.github/actions/install-python-deps
- name: Run perplexity tests
shell: bash {0}
timeout-minutes: ${{ matrix.test-group.timeout }}
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
source ${{ github.workspace }}/tests/scripts/t3000/run_t3000_perplexity_tests.sh
${{ matrix.test-group.cmd }}
- uses: ./.github/actions/slack-report
if: ${{ failure() }}
with:
slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }}
owner: ${{ matrix.test-group.owner_id }}
47 changes: 3 additions & 44 deletions .github/workflows/t3000-perplexity-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,48 +11,7 @@ jobs:
with:
arch: '["wormhole_b0"]'
secrets: inherit
t3000-accuracy-perplexity-tests:
t3000-model-perf-tests:
needs: build-artifact
strategy:
fail-fast: false
matrix:
test-group: [
{
name: t3k perplexity tests,
arch: wormhole_b0,
runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-perf"],
},
]
name: ${{ matrix.test-group.name }}
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ matrix.test-group.arch }}
LOGURU_LEVEL: INFO
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
environment: dev
runs-on: ${{ matrix.test-group.runs-on }}
steps:
- uses: tenstorrent-metal/metal-workflows/.github/actions/[email protected]
- name: Set up dynamic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
- uses: actions/download-artifact@v4
with:
name: TTMetal_build_${{ matrix.test-group.arch }}
- name: Extract files
run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
- uses: ./.github/actions/install-python-deps
- name: Run perplexity tests
shell: bash {0}
timeout-minutes: 480
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
source ${{ github.workspace }}/tests/scripts/t3000/run_t3000_perplexity_tests.sh
run_t3000_tests
- uses: ./.github/actions/slack-report
if: ${{ failure() }}
with:
slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }}
owner: U05RWH3QUPM #Salar Khorasgani
secrets: inherit
uses: ./.github/workflows/t3000-perplexity-tests-impl.yaml
1 change: 1 addition & 0 deletions .github/workflows/t3000-unit-tests-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ jobs:
{ name: "t3k falcon40b tests", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 30, owner_id: U053W15B6JF}, #Djordje Ivanovic
{ name: "t3k llama3-small tests", arch: wormhole_b0, cmd: run_t3000_llama3-small_tests, timeout: 30, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
{ name: "t3k llama3.2-11b tests", arch: wormhole_b0, cmd: run_t3000_llama3.2-11b_tests, timeout: 30, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
{ name: "t3k llama3.1-70b tests", arch: wormhole_b0, cmd: run_t3000_llama3.1-70b_tests, timeout: 30, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
{ name: "t3k mixtral tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 30, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
{ name: "t3k grok tests", arch: wormhole_b0, cmd: run_t3000_grok_tests, timeout: 30, owner_id: U03HY7MK4BT}, #Mark O'Connor
{ name: "t3k unet shallow tests", arch: wormhole_b0, cmd: run_t3000_unet_shallow_tests, timeout: 30, owner_id: U06ECNVR0EN}, #Evan Smal
Expand Down
2 changes: 1 addition & 1 deletion models/demos/t3000/mixtral8x7b/tests/test_mixtral_topk.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def forward(self, x):
@pytest.mark.parametrize(
"iterations, expected_top1, expected_top5",
(
(64, 0.93, 0.99),
(64, 0.91, 0.99),
# (128, 0.92, 0.99),
# (256, 0.92, 0.99),
),
Expand Down
8 changes: 5 additions & 3 deletions tests/scripts/t3000/run_t3000_demo_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@ run_t3000_llama3_70b_tests() {

echo "LOG_METAL: Running run_t3000_llama3_70b_tests"

# Llama3 70B demo (output verification)
env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/llama3_70b/demo/demo.py::test_LlamaModel_demo[wormhole_b0-True-device_params0-short_context-check_enabled-greedy-tt-70b-T3000-80L-decode_only-trace_mode_off-text_completion-llama3] --timeout=900 ; fail+=$?

LLAMA_DIR=/mnt/MLPerf/tt_dnn-models/llama/Meta-Llama-3.1-70B-Instruct/ WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/demo/demo.py --timeout 600; fail+=$?

# Record the end time
end_time=$(date +%s)
Expand Down Expand Up @@ -115,6 +113,10 @@ run_t3000_mixtral_tests() {
}

run_t3000_tests() {

# Run llama3 smaller tests (1B, 3B, 8B, 11B)
run_t3000_llama3_tests

# Run llama3_70b tests
run_t3000_llama3_70b_tests

Expand Down
27 changes: 27 additions & 0 deletions tests/scripts/t3000/run_t3000_frequent_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ run_t3000_ethernet_tests() {
fi
}

# TODO [Deprecation notice] - Llama2-70B will be deprecated soon for the new Llama3-70B. The CI tests will be deprecated with it.
run_t3000_llama2_70b_tests() {
# Record the start time
fail=0
Expand Down Expand Up @@ -75,6 +76,26 @@ run_t3000_llama3_tests() {
fi
}

run_t3000_llama3_70b_tests() {
# Record the start time
fail=0
start_time=$(date +%s)

echo "LOG_METAL: Running run_t3000_llama3_70b_tests"

# Run test_model (decode and prefill) for llama3 70B
LLAMA_DIR=/mnt/MLPerf/tt_dnn-models/llama/Meta-Llama-3.1-70B-Instruct/ WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/tests/test_llama_model.py -k full ; fail+=$?
LLAMA_DIR=/mnt/MLPerf/tt_dnn-models/llama/Meta-Llama-3.1-70B-Instruct/ WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/tests/test_llama_model_prefill.py ; fail+=$?

# Record the end time
end_time=$(date +%s)
duration=$((end_time - start_time))
echo "LOG_METAL: run_t3000_llama3_70b_tests $duration seconds to complete"
if [[ $fail -ne 0 ]]; then
exit 1
fi
}

run_t3000_mixtral_tests() {
# Record the start time
fail=0
Expand Down Expand Up @@ -188,9 +209,15 @@ run_t3000_tests() {
# Run falcon40b tests
run_t3000_falcon40b_tests

# Run llama3 small (1B, 3B, 8B, 11B) tests
run_t3000_llama3_tests

# Run llama2-70b tests
run_t3000_llama2_70b_tests

# Run llama3-70b tests
run_t3000_llama3_70b_tests

# Run mixtral tests
run_t3000_mixtral_tests

Expand Down
25 changes: 25 additions & 0 deletions tests/scripts/t3000/run_t3000_model_perf_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ run_t3000_mixtral_tests() {
fi
}

# TODO [Deprecation notice] - Llama2-70B will be deprecated soon for the new Llama3-70B. The CI tests will be deprecated with it.
run_t3000_llama2_70b_tests() {
# Record the start time
fail=0
Expand All @@ -55,6 +56,24 @@ run_t3000_llama2_70b_tests() {
fi
}

run_t3000_llama3_70b_tests() {
# Record the start time
fail=0
start_time=$(date +%s)

echo "LOG_METAL: Running run_t3000_llama3_70b_tests"

LLAMA_DIR=/mnt/MLPerf/tt_dnn-models/llama/Meta-Llama-3.1-70B-Instruct/ WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/tests/test_llama_perf.py ; fail+=$?

# Record the end time
end_time=$(date +%s)
duration=$((end_time - start_time))
echo "LOG_METAL: run_t3000_llama3_70b_tests $duration seconds to complete"
if [[ $fail -ne 0 ]]; then
exit 1
fi
}

run_t3000_llama3_tests() {
# Record the start time
fail=0
Expand Down Expand Up @@ -130,9 +149,15 @@ run_t3000_llm_tests() {
# Run mixtral tests
run_t3000_mixtral_tests

# Run llama3-small (1B, 3B, 8B, 11B) tests
run_t3000_llama3_tests

# Run llama2-70b tests
run_t3000_llama2_70b_tests

# Run llama3-70b tests
run_t3000_llama3_70b_tests

# Run falcon40b tests
run_t3000_falcon40b_tests

Expand Down
68 changes: 61 additions & 7 deletions tests/scripts/t3000/run_t3000_perplexity_tests.sh
Original file line number Diff line number Diff line change
@@ -1,40 +1,94 @@
#!/bin/bash

run_t3000_perplexity_tests() {
run_t3000_falcon7b_perplexity_tests() {
# Record the start time
fail=0
start_time=$(date +%s)

echo "LOG_METAL: Running run_t3000_perplexity_tests"
echo "LOG_METAL: Running run_t3000_falcon7b_perplexity_tests"

# Falcon7B perplexity tests
WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b_common/tests/perplexity/test_perplexity_falcon.py --timeout=1500 ; fail+=$?

# Record the end time
end_time=$(date +%s)
duration=$((end_time - start_time))
echo "LOG_METAL: run_t3000_falcon7b_perplexity_tests $duration seconds to complete"
if [[ $fail -ne 0 ]]; then
exit 1
fi
}

run_t3000_falcon40b_perplexity_tests() {
# Record the start time
fail=0
start_time=$(date +%s)

echo "LOG_METAL: Running run_t3000_falcon40b_perplexity_tests"

# Falcon40B perplexity tests
WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_perplexity_falcon.py --timeout=2100 ; fail+=$?

# Record the end time
end_time=$(date +%s)
duration=$((end_time - start_time))
echo "LOG_METAL: run_t3000_falcon40b_perplexity_tests $duration seconds to complete"
if [[ $fail -ne 0 ]]; then
exit 1
fi
}

run_t3000_llama70b_perplexity_tests() {
# Record the start time
fail=0
start_time=$(date +%s)

echo "LOG_METAL: Running run_t3000_llama70b_perplexity_tests"

# Llama-70B perplexity tests
WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/llama2_70b/demo/eval_t3000.py --timeout=7200 ; fail+=$?

# Record the end time
end_time=$(date +%s)
duration=$((end_time - start_time))
echo "LOG_METAL: run_t3000_llama70b_perplexity_tests $duration seconds to complete"
if [[ $fail -ne 0 ]]; then
exit 1
fi
}

run_t3000_mixtral8x7b_perplexity_tests() {
# Record the start time
fail=0
start_time=$(date +%s)

echo "LOG_METAL: Running run_t3000_mixtral8x7b_perplexity_tests"

# Mixtral8x7B perplexity tests
WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_perplexity.py --timeout=3600 ; fail+=$?
# WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_perplexity.py --timeout=3600 ; fail+=$?
WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_topk.py --timeout=3600 ; fail+=$?

# Record the end time
end_time=$(date +%s)
duration=$((end_time - start_time))
echo "LOG_METAL: run_t3000_perplexity_tests $duration seconds to complete"
echo "LOG_METAL: run_t3000_mixtral8x7b_perplexity_tests $duration seconds to complete"
if [[ $fail -ne 0 ]]; then
exit 1
fi
}


run_t3000_tests() {
# Run Falcon-7B perplexity tests
run_t3000_falcon7b_perplexity_tests

# Run perplexity tests
run_t3000_perplexity_tests
# Run Falcon-40B perplexity tests
run_t3000_falcon40b_perplexity_tests

# Run Llama-70B perplexity tests
run_t3000_llama70b_perplexity_tests

# Run Mixtral8x7B perplexity tests
run_t3000_mixtral8x7b_perplexity_tests
}

fail=0
Expand Down
Loading

0 comments on commit 3e70b34

Please sign in to comment.