#13368: Add llaama3-70b to CI tests. Revamped CI perplexity test (TOD…

…O: add llama3 to topk/perplexity)
tenstorrent · Oct 25, 2024 · 3e70b34 · 3e70b34
1 parent 0bf386b
commit 3e70b34
Show file tree

Hide file tree

Showing 12 changed files with 220 additions and 61 deletions.
diff --git a/.github/workflows/t3000-demo-tests-impl.yaml b/.github/workflows/t3000-demo-tests-impl.yaml
@@ -10,8 +10,8 @@ jobs:
       matrix:
         test-group: [
           { name: "t3k_falcon40b_tests", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 50, owner_id: U053W15B6JF}, #Djordje Ivanovic
-          { name: "t3k_llama3_70b_tests", arch: wormhole_b0, cmd: run_t3000_llama3_70b_tests, timeout: 30, owner_id: U03FJB5TM5Y}, #Colman Glagovich
           { name: "t3k_llama3_tests", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 30, owner_id: U03PUAKE719}, # Miguel Tairum
+          { name: "t3k_llama3_70b_tests", arch: wormhole_b0, cmd: run_t3000_llama3_70b_tests, timeout: 30, owner_id: U03FJB5TM5Y}, #Colman Glagovich
           { name: "t3k_falcon7b_tests", arch: wormhole_b0, cmd: run_t3000_falcon7b_tests, timeout: 90, owner_id: U05RWH3QUPM}, #Salar Hosseini
           { name: "t3k_mixtral_tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 50, owner_id: U03PUAKE719}, # Miguel Tairum
         ]

diff --git a/.github/workflows/t3000-frequent-tests-impl.yaml b/.github/workflows/t3000-frequent-tests-impl.yaml
@@ -13,8 +13,9 @@ jobs:
           { name: "t3k ethernet tests", arch: wormhole_b0, cmd: run_t3000_ethernet_tests, timeout: 60, owner_id: ULMEPM2MA}, #Sean Nijjar
           { name: "t3k trace stress tests", arch: wormhole_b0, cmd: run_t3000_trace_stress_tests, timeout: 120, owner_id: U03NG0A5ND7}, #Aditya Saigal
           { name: "t3k falcon40b tests", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 120, owner_id: U04S2UV6L8N}, #Sofija Jovic
-          { name: "t3k llama2_70b tests", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 60, owner_id: U03FJB5TM5Y}, #Colman Glagovich
-          { name: "t3k llama3 tests", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 60, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
+          { name: "t3k llama3 tests", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 45, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
+          { name: "t3k llama2_70b tests", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 45, owner_id: U03FJB5TM5Y}, #Colman Glagovich
+          { name: "t3k llama3_70b tests", arch: wormhole_b0, cmd: run_t3000_llama3_70b_tests, timeout: 45, owner_id: U03FJB5TM5Y}, #Colman Glagovich
           { name: "t3k mixtral tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 60, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
           { name: "t3k resnet tests", arch: wormhole_b0, cmd: run_t3000_resnet_tests, timeout: 30, owner_id: U013121KDH9}, #Austin Ho
         ]

diff --git a/.github/workflows/t3000-model-perf-tests-impl.yaml b/.github/workflows/t3000-model-perf-tests-impl.yaml
@@ -12,8 +12,9 @@ jobs:
         test-group: [
           { name: "t3k LLM falcon7b model perf tests", model: "falcon7b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon7b_tests, timeout: 75, owner_id: U05RWH3QUPM}, # Salar Hosseini
           { name: "t3k LLM mixtral model perf tests", model: "mixtral", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 75, owner_id: U03PUAKE719}, # Miguel Tairum
-          { name: "t3k LLM llama2 model perf tests", model: "llama2", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 75, owner_id: U03FJB5TM5Y}, # Colman Glagovich
-          { name: "t3k LLM llama3 model perf tests", model: "llama3", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 75, owner_id: U03PUAKE719}, # Miguel Tairum
+          { name: "t3k LLM llama2-70B model perf tests", model: "llama2", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 75, owner_id: U03FJB5TM5Y}, # Colman Glagovich
+          { name: "t3k LLM llama3-70B model perf tests", model: "llama3", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama3_70b_tests, timeout: 60, owner_id: U03FJB5TM5Y}, # Colman Glagovich
+          { name: "t3k LLM llama3 model perf tests", model: "llama3", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 60, owner_id: U03PUAKE719}, # Miguel Tairum
           { name: "t3k LLM falcon40b model perf tests", model: "falcon40b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 75, owner_id: U053W15B6JF}, # Djordje Ivanovic
           { name: "t3k CNN resnet50 model perf tests", model: "resnet50", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_resnet50_tests, timeout: 75, owner_id: U013121KDH9}, # Austin Ho
           #{ name: "t3k CNN model perf tests ", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_cnn_tests, timeout: 120, owner_id: }, #No tests are being run?

diff --git a/.github/workflows/t3000-perplexity-tests-impl.yaml b/.github/workflows/t3000-perplexity-tests-impl.yaml
@@ -0,0 +1,52 @@
+name: "[internal] T3000 perplexity tests impl"
+
+on:
+  workflow_call:
+
+jobs:
+
+  t3000-accuracy-perplexity-tests:
+    strategy:
+      fail-fast: false
+      matrix:
+        test-group: [
+          { name: "t3k_falcon7b_tests", arch: wormhole_b0, cmd: run_t3000_falcon7b_perplexity_tests, timeout: 480, owner_id: U05RWH3QUPM}, #Salar Hosseini
+          { name: "t3k_falcon40b_tests", arch: wormhole_b0, cmd: run_t3000_falcon40b_perplexity_tests, timeout: 49, owner_id: U05RWH3QUPM}, #Salar Hosseini
+          { name: "t3k_llama_70b_tests", arch: wormhole_b0, cmd: run_t3000_llama70b_perplexity_tests, timeout: 30, owner_id: U05RWH3QUPM}, #Salar Hosseini
+          { name: "t3k_mixtral_tests", arch: wormhole_b0, cmd: run_t3000_mixtral8x7b_perplexity_tests, timeout: 50, owner_id: U03PUAKE719}, # Miguel Tairum
+        ]
+
+    name: ${{ matrix.test-group.name }}
+    env:
+      TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
+      ARCH_NAME: ${{ matrix.test-group.arch }}
+      LOGURU_LEVEL: INFO
+      LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
+    environment: dev
+    runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-perf"]
+    steps:
+      - uses: tenstorrent-metal/metal-workflows/.github/actions/[email protected]
+      - uses: ./.github/actions/ensure-active-weka-mount
+      - name: Set up dynamic env vars for build
+        run: |
+          echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
+      - uses: actions/download-artifact@v4
+        with:
+          name: TTMetal_build_${{ matrix.test-group.arch }}
+      - name: Extract files
+        run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
+      - uses: ./.github/actions/install-python-deps
+      - name: Run perplexity tests
+        shell: bash {0}
+        timeout-minutes: ${{ matrix.test-group.timeout }}
+        run: |
+          source ${{ github.workspace }}/python_env/bin/activate
+          cd $TT_METAL_HOME
+          export PYTHONPATH=$TT_METAL_HOME
+          source ${{ github.workspace }}/tests/scripts/t3000/run_t3000_perplexity_tests.sh
+          ${{ matrix.test-group.cmd }}
+      - uses: ./.github/actions/slack-report
+        if: ${{ failure() }}
+        with:
+          slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }}
+          owner: ${{ matrix.test-group.owner_id }}
diff --git a/.github/workflows/t3000-perplexity-tests.yaml b/.github/workflows/t3000-perplexity-tests.yaml
@@ -11,48 +11,7 @@ jobs:
     with:
       arch: '["wormhole_b0"]'
     secrets: inherit
-  t3000-accuracy-perplexity-tests:
+  t3000-model-perf-tests:
     needs: build-artifact
-    strategy:
-      fail-fast: false
-      matrix:
-        test-group: [
-          {
-            name: t3k perplexity tests,
-            arch: wormhole_b0,
-            runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-perf"],
-          },
-        ]
-    name: ${{ matrix.test-group.name }}
-    env:
-      TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
-      ARCH_NAME: ${{ matrix.test-group.arch }}
-      LOGURU_LEVEL: INFO
-      LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
-    environment: dev
-    runs-on: ${{ matrix.test-group.runs-on }}
-    steps:
-      - uses: tenstorrent-metal/metal-workflows/.github/actions/[email protected]
-      - name: Set up dynamic env vars for build
-        run: |
-          echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
-      - uses: actions/download-artifact@v4
-        with:
-          name: TTMetal_build_${{ matrix.test-group.arch }}
-      - name: Extract files
-        run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
-      - uses: ./.github/actions/install-python-deps
-      - name: Run perplexity tests
-        shell: bash {0}
-        timeout-minutes: 480
-        run: |
-          source ${{ github.workspace }}/python_env/bin/activate
-          cd $TT_METAL_HOME
-          export PYTHONPATH=$TT_METAL_HOME
-          source ${{ github.workspace }}/tests/scripts/t3000/run_t3000_perplexity_tests.sh
-          run_t3000_tests
-      - uses: ./.github/actions/slack-report
-        if: ${{ failure() }}
-        with:
-          slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }}
-          owner: U05RWH3QUPM #Salar Khorasgani
+    secrets: inherit
+    uses: ./.github/workflows/t3000-perplexity-tests-impl.yaml
diff --git a/.github/workflows/t3000-unit-tests-impl.yaml b/.github/workflows/t3000-unit-tests-impl.yaml
@@ -15,6 +15,7 @@ jobs:
           { name: "t3k falcon40b tests", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 30, owner_id: U053W15B6JF}, #Djordje Ivanovic
           { name: "t3k llama3-small tests", arch: wormhole_b0, cmd: run_t3000_llama3-small_tests, timeout: 30, owner_id: U03PUAKE719},  #Miguel Tairum Cruz
           { name: "t3k llama3.2-11b tests", arch: wormhole_b0, cmd: run_t3000_llama3.2-11b_tests, timeout: 30, owner_id: U03PUAKE719},  #Miguel Tairum Cruz
+          { name: "t3k llama3.1-70b tests", arch: wormhole_b0, cmd: run_t3000_llama3.1-70b_tests, timeout: 30, owner_id: U03PUAKE719},  #Miguel Tairum Cruz
           { name: "t3k mixtral tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 30, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
           { name: "t3k grok tests", arch: wormhole_b0, cmd: run_t3000_grok_tests, timeout: 30, owner_id: U03HY7MK4BT}, #Mark O'Connor
           { name: "t3k unet shallow tests", arch: wormhole_b0, cmd: run_t3000_unet_shallow_tests, timeout: 30, owner_id: U06ECNVR0EN}, #Evan Smal

diff --git a/models/demos/t3000/mixtral8x7b/tests/test_mixtral_topk.py b/models/demos/t3000/mixtral8x7b/tests/test_mixtral_topk.py
@@ -29,7 +29,7 @@ def forward(self, x):
 @pytest.mark.parametrize(
     "iterations, expected_top1, expected_top5",
     (
-        (64, 0.93, 0.99),
+        (64, 0.91, 0.99),
         # (128, 0.92, 0.99),
         # (256, 0.92, 0.99),
     ),

diff --git a/tests/scripts/t3000/run_t3000_demo_tests.sh b/tests/scripts/t3000/run_t3000_demo_tests.sh
@@ -30,9 +30,7 @@ run_t3000_llama3_70b_tests() {
 
   echo "LOG_METAL: Running run_t3000_llama3_70b_tests"
 
-  # Llama3 70B demo (output verification)
-  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/llama3_70b/demo/demo.py::test_LlamaModel_demo[wormhole_b0-True-device_params0-short_context-check_enabled-greedy-tt-70b-T3000-80L-decode_only-trace_mode_off-text_completion-llama3] --timeout=900 ; fail+=$?
-
+  LLAMA_DIR=/mnt/MLPerf/tt_dnn-models/llama/Meta-Llama-3.1-70B-Instruct/ WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/demo/demo.py --timeout 600; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
@@ -115,6 +113,10 @@ run_t3000_mixtral_tests() {
 }
 
 run_t3000_tests() {
+
+  # Run llama3 smaller tests (1B, 3B, 8B, 11B)
+  run_t3000_llama3_tests
+
   # Run llama3_70b tests
   run_t3000_llama3_70b_tests
 

diff --git a/tests/scripts/t3000/run_t3000_frequent_tests.sh b/tests/scripts/t3000/run_t3000_frequent_tests.sh
@@ -21,6 +21,7 @@ run_t3000_ethernet_tests() {
   fi
 }
 
+# TODO [Deprecation notice] - Llama2-70B will be deprecated soon for the new Llama3-70B. The CI tests will be deprecated with it.
 run_t3000_llama2_70b_tests() {
   # Record the start time
   fail=0
@@ -75,6 +76,26 @@ run_t3000_llama3_tests() {
   fi
 }
 
+run_t3000_llama3_70b_tests() {
+  # Record the start time
+  fail=0
+  start_time=$(date +%s)
+
+  echo "LOG_METAL: Running run_t3000_llama3_70b_tests"
+
+  # Run test_model (decode and prefill) for llama3 70B
+  LLAMA_DIR=/mnt/MLPerf/tt_dnn-models/llama/Meta-Llama-3.1-70B-Instruct/ WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/tests/test_llama_model.py -k full ; fail+=$?
+  LLAMA_DIR=/mnt/MLPerf/tt_dnn-models/llama/Meta-Llama-3.1-70B-Instruct/ WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/tests/test_llama_model_prefill.py ; fail+=$?
+
+  # Record the end time
+  end_time=$(date +%s)
+  duration=$((end_time - start_time))
+  echo "LOG_METAL: run_t3000_llama3_70b_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
+}
+
 run_t3000_mixtral_tests() {
   # Record the start time
   fail=0
@@ -188,9 +209,15 @@ run_t3000_tests() {
   # Run falcon40b tests
   run_t3000_falcon40b_tests
 
+  # Run llama3 small (1B, 3B, 8B, 11B) tests
+  run_t3000_llama3_tests
+
   # Run llama2-70b tests
   run_t3000_llama2_70b_tests
 
+  # Run llama3-70b tests
+  run_t3000_llama3_70b_tests
+
   # Run mixtral tests
   run_t3000_mixtral_tests
 

diff --git a/tests/scripts/t3000/run_t3000_model_perf_tests.sh b/tests/scripts/t3000/run_t3000_model_perf_tests.sh
@@ -37,6 +37,7 @@ run_t3000_mixtral_tests() {
   fi
 }
 
+# TODO [Deprecation notice] - Llama2-70B will be deprecated soon for the new Llama3-70B. The CI tests will be deprecated with it.
 run_t3000_llama2_70b_tests() {
   # Record the start time
   fail=0
@@ -55,6 +56,24 @@ run_t3000_llama2_70b_tests() {
   fi
 }
 
+run_t3000_llama3_70b_tests() {
+  # Record the start time
+  fail=0
+  start_time=$(date +%s)
+
+  echo "LOG_METAL: Running run_t3000_llama3_70b_tests"
+
+  LLAMA_DIR=/mnt/MLPerf/tt_dnn-models/llama/Meta-Llama-3.1-70B-Instruct/ WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/llama3/tests/test_llama_perf.py ; fail+=$?
+
+  # Record the end time
+  end_time=$(date +%s)
+  duration=$((end_time - start_time))
+  echo "LOG_METAL: run_t3000_llama3_70b_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
+}
+
 run_t3000_llama3_tests() {
   # Record the start time
   fail=0
@@ -130,9 +149,15 @@ run_t3000_llm_tests() {
   # Run mixtral tests
   run_t3000_mixtral_tests
 
+  # Run llama3-small (1B, 3B, 8B, 11B) tests
+  run_t3000_llama3_tests
+
   # Run llama2-70b tests
   run_t3000_llama2_70b_tests
 
+  # Run llama3-70b tests
+  run_t3000_llama3_70b_tests
+
   # Run falcon40b tests
   run_t3000_falcon40b_tests
 

diff --git a/tests/scripts/t3000/run_t3000_perplexity_tests.sh b/tests/scripts/t3000/run_t3000_perplexity_tests.sh
@@ -1,40 +1,94 @@
 #!/bin/bash
 
-run_t3000_perplexity_tests() {
+run_t3000_falcon7b_perplexity_tests() {
   # Record the start time
   fail=0
   start_time=$(date +%s)
 
-  echo "LOG_METAL: Running run_t3000_perplexity_tests"
+  echo "LOG_METAL: Running run_t3000_falcon7b_perplexity_tests"
 
   # Falcon7B perplexity tests
   WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b_common/tests/perplexity/test_perplexity_falcon.py --timeout=1500 ; fail+=$?
 
+  # Record the end time
+  end_time=$(date +%s)
+  duration=$((end_time - start_time))
+  echo "LOG_METAL: run_t3000_falcon7b_perplexity_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
+}
+
+run_t3000_falcon40b_perplexity_tests() {
+  # Record the start time
+  fail=0
+  start_time=$(date +%s)
+
+  echo "LOG_METAL: Running run_t3000_falcon40b_perplexity_tests"
+
   # Falcon40B perplexity tests
   WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_perplexity_falcon.py --timeout=2100 ; fail+=$?
 
+  # Record the end time
+  end_time=$(date +%s)
+  duration=$((end_time - start_time))
+  echo "LOG_METAL: run_t3000_falcon40b_perplexity_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
+}
+
+run_t3000_llama70b_perplexity_tests() {
+  # Record the start time
+  fail=0
+  start_time=$(date +%s)
+
+  echo "LOG_METAL: Running run_t3000_llama70b_perplexity_tests"
+
   # Llama-70B perplexity tests
   WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/llama2_70b/demo/eval_t3000.py --timeout=7200 ; fail+=$?
 
+  # Record the end time
+  end_time=$(date +%s)
+  duration=$((end_time - start_time))
+  echo "LOG_METAL: run_t3000_llama70b_perplexity_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
+}
+
+run_t3000_mixtral8x7b_perplexity_tests() {
+  # Record the start time
+  fail=0
+  start_time=$(date +%s)
+
+  echo "LOG_METAL: Running run_t3000_mixtral8x7b_perplexity_tests"
+
   # Mixtral8x7B perplexity tests
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_perplexity.py --timeout=3600 ; fail+=$?
+  # WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_perplexity.py --timeout=3600 ; fail+=$?
   WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_topk.py --timeout=3600 ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
-  echo "LOG_METAL: run_t3000_perplexity_tests $duration seconds to complete"
+  echo "LOG_METAL: run_t3000_mixtral8x7b_perplexity_tests $duration seconds to complete"
   if [[ $fail -ne 0 ]]; then
     exit 1
   fi
 }
 
-
 run_t3000_tests() {
+  # Run Falcon-7B perplexity tests
+  run_t3000_falcon7b_perplexity_tests
 
-  # Run perplexity tests
-  run_t3000_perplexity_tests
+  # Run Falcon-40B perplexity tests
+  run_t3000_falcon40b_perplexity_tests
+
+  # Run Llama-70B perplexity tests
+  run_t3000_llama70b_perplexity_tests
 
+  # Run Mixtral8x7B perplexity tests
+  run_t3000_mixtral8x7b_perplexity_tests
 }
 
 fail=0