From 70836bc42ae5666b95baf32024865864a77a0de3 Mon Sep 17 00:00:00 2001
From: Raymond Kim <rkim@tenstorrent.com>
Date: Fri, 28 Jun 2024 10:12:52 -0400
Subject: [PATCH 1/3] #9714: Move WH device perf to a virtual machine with a
 higher timeout to see how things go

---
 .github/workflows/perf-device-models.yaml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/perf-device-models.yaml b/.github/workflows/perf-device-models.yaml
index 97717bf7382..bf27110ff3a 100644
--- a/.github/workflows/perf-device-models.yaml
+++ b/.github/workflows/perf-device-models.yaml
@@ -1,4 +1,4 @@
-name: "Device perf regressions and output report"
+name: "(Single-card) Device perf tests"
 
 on:
   workflow_dispatch:
@@ -15,7 +15,8 @@ jobs:
       matrix:
         test-info: [
           {name: "GS", arch: grayskull, runs-on: ["perf-no-reset-grayskull", "self-reset"], machine-type: "bare_metal", timeout: 40},
-          {name: "N300 WH B0", arch: wormhole_b0, runs-on: ["perf-wormhole_b0", "self-reset"], machine-type: "bare_metal", timeout: 30},
+          # Runs on virtual machine now
+          {name: "N300 WH B0", arch: wormhole_b0, runs-on: ["model-runner-wormhole_b0"], machine-type: "bare_metal", timeout: 60},
         ]
     name: "${{ matrix.test-info.name }} device perf"
     env:

From 982d64d47cdc1df5e8e21d8d4f4396a9d66bce9e Mon Sep 17 00:00:00 2001
From: Raymond Kim <rkim@tenstorrent.com>
Date: Fri, 28 Jun 2024 11:08:04 -0400
Subject: [PATCH 2/3] #9714: Separate stable diffusion into an unstable
 single-card pipeline

---
 .github/workflows/perf-device-models.yaml |  9 +++++----
 tests/scripts/run_performance.sh          | 14 +++++++++++---
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/perf-device-models.yaml b/.github/workflows/perf-device-models.yaml
index bf27110ff3a..18402655ac7 100644
--- a/.github/workflows/perf-device-models.yaml
+++ b/.github/workflows/perf-device-models.yaml
@@ -14,9 +14,10 @@ jobs:
       fail-fast: false
       matrix:
         test-info: [
-          {name: "GS", arch: grayskull, runs-on: ["perf-no-reset-grayskull", "self-reset"], machine-type: "bare_metal", timeout: 40},
+          {name: "GS", arch: grayskull, runs-on: ["perf-no-reset-grayskull", "self-reset"], machine-type: "bare_metal", model-type: "other", timeout: 40},
           # Runs on virtual machine now
-          {name: "N300 WH B0", arch: wormhole_b0, runs-on: ["model-runner-wormhole_b0"], machine-type: "bare_metal", timeout: 60},
+          {name: "N300", arch: wormhole_b0, runs-on: ["model-runner-wormhole_b0"], machine-type: "bare_metal", model-type: "other", timeout: 30},
+          {name: "Unstable N300", arch: wormhole_b0, runs-on: ["model-runner-wormhole_b0"], machine-type: "bare_metal", model-type: "unstable_models", timeout: 15},
         ]
     name: "${{ matrix.test-info.name }} device perf"
     env:
@@ -44,7 +45,7 @@ jobs:
         timeout-minutes: ${{ matrix.test-info.timeout }}
         run: |
           source python_env/bin/activate
-          ./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type models_device_performance_${{ matrix.test-info.machine-type }}
+          ./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type ${{ matrix.test-info.model-type }}_device_performance_${{ matrix.test-info.machine-type }}
       - name: Check device perf report exists
         id: check-device-perf-report
         if: ${{ !cancelled() }}
@@ -57,5 +58,5 @@ jobs:
         if: ${{ !cancelled() && steps.check-device-perf-report.conclusion == 'success' }}
         uses: actions/upload-artifact@v4
         with:
-          name: device-perf-report-csv-${{ matrix.test-info.arch }}-${{ matrix.test-info.machine-type }}
+          name: device-perf-report-csv-${{ matrix.test-info.arch }}-${{ matrix.test-info.machine-type }}-${{ matrix.test-info.model-type }}
           path: "${{ steps.check-device-perf-report.outputs.device_perf_report_filename }}"
diff --git a/tests/scripts/run_performance.sh b/tests/scripts/run_performance.sh
index 754bcbc9ab1..5e482dac434 100755
--- a/tests/scripts/run_performance.sh
+++ b/tests/scripts/run_performance.sh
@@ -60,8 +60,6 @@ run_perf_models_cnn_javelin() {
 run_device_perf_models() {
     local test_marker=$1
 
-    env pytest tests/device_perf_tests/stable_diffusion -m $test_marker --timeout=600
-
     if [ "$tt_arch" == "grayskull" ]; then
         #TODO(MO): Until #6560 is fixed, GS device profiler test are grouped with
         #Model Device perf regression tests to make sure thy run on no-soft-reset BMs
@@ -93,6 +91,14 @@ run_device_perf_models() {
     env python models/perf/merge_device_perf_results.py
 }
 
+run_unstable_device_perf_models() {
+    local test_marker=$1
+
+    env pytest tests/device_perf_tests/stable_diffusion -m $test_marker --timeout=600
+
+    ## Merge all the generated reports
+    env python models/perf/merge_device_perf_results.py
+}
 run_device_perf_ops() {
     local test_marker=$1
 
@@ -140,7 +146,9 @@ main() {
         exit 1
     fi
 
-    if [[ "$pipeline_type" == *"device_performance"* ]]; then
+    if [[ "$pipeline_type" == "unstable_models_device_performance"* ]]; then
+        run_unstable_device_perf_models "$test_marker"
+    elif [[ "$pipeline_type" == "other_device_performance"* ]]; then
         run_device_perf_models "$test_marker"
         run_device_perf_ops "$test_marker"
     elif [[ "$pipeline_type" == "llm_javelin_models_performance"* ]]; then

From d13d15aee53ebf156eb784f0ed211d40b7b00891 Mon Sep 17 00:00:00 2001
From: Raymond Kim <rkim@tenstorrent.com>
Date: Fri, 28 Jun 2024 11:28:27 -0400
Subject: [PATCH 3/3] #9714: Add handling for new pipelines in run_tests.sh

---
 .github/workflows/perf-device-models.yaml | 2 +-
 tests/scripts/run_tests.sh                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/perf-device-models.yaml b/.github/workflows/perf-device-models.yaml
index 18402655ac7..0305a8666e5 100644
--- a/.github/workflows/perf-device-models.yaml
+++ b/.github/workflows/perf-device-models.yaml
@@ -17,7 +17,7 @@ jobs:
           {name: "GS", arch: grayskull, runs-on: ["perf-no-reset-grayskull", "self-reset"], machine-type: "bare_metal", model-type: "other", timeout: 40},
           # Runs on virtual machine now
           {name: "N300", arch: wormhole_b0, runs-on: ["model-runner-wormhole_b0"], machine-type: "bare_metal", model-type: "other", timeout: 30},
-          {name: "Unstable N300", arch: wormhole_b0, runs-on: ["model-runner-wormhole_b0"], machine-type: "bare_metal", model-type: "unstable_models", timeout: 15},
+          {name: "Unstable N300", arch: wormhole_b0, runs-on: ["model-runner-wormhole_b0"], machine-type: "bare_metal", model-type: "unstable_models", timeout: 25},
         ]
     name: "${{ matrix.test-info.name }} device perf"
     env:
diff --git a/tests/scripts/run_tests.sh b/tests/scripts/run_tests.sh
index 334b68b71fd..9d8a3b22ca8 100755
--- a/tests/scripts/run_tests.sh
+++ b/tests/scripts/run_tests.sh
@@ -315,7 +315,7 @@ run_pipeline_tests() {
         run_post_commit_pipeline_tests "$tt_arch" "$pipeline_type" "$dispatch_mode"
     elif [[ $pipeline_type == "frequent_api" ]]; then
         run_frequent_api_pipeline_tests "$tt_arch" "$pipeline_type" "$dispatch_mode"
-    elif [[ $pipeline_type == *"models_performance_bare_metal" || $pipeline_type == "models_device_performance_bare_metal" ]]; then
+    elif [[ $pipeline_type == *"models_performance_bare_metal" || $pipeline_type == *"device_performance_bare_metal" ]]; then
         run_models_performance_bare_metal_pipeline_tests "$tt_arch" "$pipeline_type" "$dispatch_mode"
     elif [[ $pipeline_type == "models_performance_virtual_machine" ]]; then
         run_models_performance_virtual_machine_pipeline_tests "$tt_arch" "$pipeline_type"