#9714: Separate stable diffusion into an unstable single-card pipeline

tenstorrent · Jun 28, 2024 · 982d64d · 982d64d
1 parent 70836bc
commit 982d64d
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 7 deletions.
diff --git a/.github/workflows/perf-device-models.yaml b/.github/workflows/perf-device-models.yaml
@@ -14,9 +14,10 @@ jobs:
       fail-fast: false
       matrix:
         test-info: [
-          {name: "GS", arch: grayskull, runs-on: ["perf-no-reset-grayskull", "self-reset"], machine-type: "bare_metal", timeout: 40},
+          {name: "GS", arch: grayskull, runs-on: ["perf-no-reset-grayskull", "self-reset"], machine-type: "bare_metal", model-type: "other", timeout: 40},
           # Runs on virtual machine now
-          {name: "N300 WH B0", arch: wormhole_b0, runs-on: ["model-runner-wormhole_b0"], machine-type: "bare_metal", timeout: 60},
+          {name: "N300", arch: wormhole_b0, runs-on: ["model-runner-wormhole_b0"], machine-type: "bare_metal", model-type: "other", timeout: 30},
+          {name: "Unstable N300", arch: wormhole_b0, runs-on: ["model-runner-wormhole_b0"], machine-type: "bare_metal", model-type: "unstable_models", timeout: 15},
         ]
     name: "${{ matrix.test-info.name }} device perf"
     env:
@@ -44,7 +45,7 @@ jobs:
         timeout-minutes: ${{ matrix.test-info.timeout }}
         run: |
           source python_env/bin/activate
-          ./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type models_device_performance_${{ matrix.test-info.machine-type }}
+          ./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type ${{ matrix.test-info.model-type }}_device_performance_${{ matrix.test-info.machine-type }}
       - name: Check device perf report exists
         id: check-device-perf-report
         if: ${{ !cancelled() }}
@@ -57,5 +58,5 @@ jobs:
         if: ${{ !cancelled() && steps.check-device-perf-report.conclusion == 'success' }}
         uses: actions/upload-artifact@v4
         with:
-          name: device-perf-report-csv-${{ matrix.test-info.arch }}-${{ matrix.test-info.machine-type }}
+          name: device-perf-report-csv-${{ matrix.test-info.arch }}-${{ matrix.test-info.machine-type }}-${{ matrix.test-info.model-type }}
           path: "${{ steps.check-device-perf-report.outputs.device_perf_report_filename }}"
diff --git a/tests/scripts/run_performance.sh b/tests/scripts/run_performance.sh
@@ -60,8 +60,6 @@ run_perf_models_cnn_javelin() {
 run_device_perf_models() {
     local test_marker=$1
 
-    env pytest tests/device_perf_tests/stable_diffusion -m $test_marker --timeout=600
-
     if [ "$tt_arch" == "grayskull" ]; then
         #TODO(MO): Until #6560 is fixed, GS device profiler test are grouped with
         #Model Device perf regression tests to make sure thy run on no-soft-reset BMs
@@ -93,6 +91,14 @@ run_device_perf_models() {
     env python models/perf/merge_device_perf_results.py
 }
 
+run_unstable_device_perf_models() {
+    local test_marker=$1
+
+    env pytest tests/device_perf_tests/stable_diffusion -m $test_marker --timeout=600
+
+    ## Merge all the generated reports
+    env python models/perf/merge_device_perf_results.py
+}
 run_device_perf_ops() {
     local test_marker=$1
 
@@ -140,7 +146,9 @@ main() {
         exit 1
     fi
 
-    if [[ "$pipeline_type" == *"device_performance"* ]]; then
+    if [[ "$pipeline_type" == "unstable_models_device_performance"* ]]; then
+        run_unstable_device_perf_models "$test_marker"
+    elif [[ "$pipeline_type" == "other_device_performance"* ]]; then
         run_device_perf_models "$test_marker"
         run_device_perf_ops "$test_marker"
     elif [[ "$pipeline_type" == "llm_javelin_models_performance"* ]]; then