Add all gather perf to TG pipeline

tenstorrent · Nov 13, 2024 · 1519429 · 1519429
1 parent f38e3ba
commit 1519429
Show file tree

Hide file tree

Showing 5 changed files with 63 additions and 11 deletions.
diff --git a/.github/workflows/tg-model-perf-tests-impl.yaml b/.github/workflows/tg-model-perf-tests-impl.yaml
@@ -23,6 +23,13 @@ jobs:
             runs-on: ["arch-wormhole_b0", "config-tg", "in-service", "bare-metal", "pipeline-perf"],
             cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type cnn_model_perf_tg_device --dispatch-mode ""'
           },
+          { name: "t3k CCL all_gather perf tests",
+            arch: wormhole_b0,
+            cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type ccl_all_gather_perf_tg_device --dispatch-mode ""',
+            timeout: 75,
+            tracy: true,
+            runs-on: ["arch-wormhole_b0", "config-tg", "in-service", "bare-metal", "pipeline-perf"],
+            owner_id: ULMEPM2MA}, # Sean Nijjar
         ]
     name: ${{ matrix.test-group.name }}
     env:
@@ -41,13 +48,25 @@ jobs:
         run: |
           echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
           echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV
-      - uses: actions/download-artifact@v4
+      - name: Download profiler build artifact
+        id: download-profiler-artifact
+        if: ${{ matrix.test-group.tracy }}
+        uses: actions/download-artifact@v4
+        with:
+          name: TTMetal_build_${{ matrix.test-group.arch }}_profiler
+        continue-on-error: true
+      - name: Download build artifact
+        id: download-artifact
+        if: ${{ !matrix.test-group.tracy }}
+        uses: actions/download-artifact@v4
         with:
           name: TTMetal_build_${{ matrix.test-group.arch }}
       - name: Extract files
+        if: ${{ matrix.test-group.tracy && steps.download-profiler-artifact.outcome == 'success' || !matrix.test-group.tracy }}
         run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
       - uses: ./.github/actions/install-python-deps
       - name: Run model perf regression tests
+        if: ${{ matrix.test-group.tracy && steps.download-profiler-artifact.outcome == 'success' || !matrix.test-group.tracy }}
         timeout-minutes: 60
         run: |
           source ${{ github.workspace }}/python_env/bin/activate
@@ -56,12 +75,28 @@ jobs:
           ${{ matrix.test-group.cmd }}
       - name: Check perf report exists
         id: check-perf-report
-        if: ${{ !cancelled() }}
+        if: ${{ !cancelled() && (matrix.test-group.tracy && steps.download-profiler-artifact.outcome == 'success' || !matrix.test-group.tracy) }}
         run: |
-          ls -hal
-          export PERF_REPORT_FILENAME=Models_Perf_$(date +%Y_%m_%d).csv
-          ls -hal $PERF_REPORT_FILENAME
-          echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT"
+          TODAY=$(date +%Y_%m_%d)
+          PERF_REPORT_FILENAME_MODELS="Models_Perf_${TODAY}.csv"
+          PERF_REPORT_FILENAME_CCL="CCL_Perf_${TODAY}.csv"
+          if [ "${{ matrix.test-info.tracy }}" == "true" ]; then
+            if [ -f "$PERF_REPORT_FILENAME_CCL" ]; then
+              echo "Found CCL Perf report: $PERF_REPORT_FILENAME_CCL"
+              echo "perf_report_filename=$PERF_REPORT_FILENAME_CCL" >> "$GITHUB_OUTPUT"
+            else
+              echo "No CCL perf report found for today."
+              exit 1
+            fi
+          else
+            if [ -f "$PERF_REPORT_FILENAME_MODELS" ]; then
+              echo "Found Models Perf report: $PERF_REPORT_FILENAME_MODELS"
+              echo "perf_report_filename=$PERF_REPORT_FILENAME_MODELS" >> "$GITHUB_OUTPUT"
+            else
+              echo "No Models perf report found for today."
+              exit 1
+            fi
+          fi
       - name: Upload perf report
         if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }}
         uses: actions/upload-artifact@v4

diff --git a/.github/workflows/tg-model-perf-tests.yaml b/.github/workflows/tg-model-perf-tests.yaml
@@ -11,7 +11,13 @@ jobs:
     with:
       arch: '["wormhole_b0"]'
     secrets: inherit
+  build-artifact-profiler:
+    uses: ./.github/workflows/build-artifact.yaml
+    with:
+      arch: '["wormhole_b0"]'
+      tracy: true
+    secrets: inherit
   tg-model-perf-tests:
-    needs: build-artifact
+    needs: [build-artifact, build-artifact-profiler]
     secrets: inherit
     uses: ./.github/workflows/tg-model-perf-tests-impl.yaml
diff --git a/tests/scripts/run_tests.sh b/tests/scripts/run_tests.sh
@@ -241,6 +241,12 @@ model_perf_tg_device() {
 
     ./tests/scripts/tg/run_tg_model_perf_tests.sh --pipeline-type "$pipeline_type"
 }
+
+# Run ccl model perf tests
+ccl_perf_tg_device() {
+
+    ./tests/ttnn/unit_tests/operations/ccl/perf/run_all_gather_profile.sh -t tg
+}
 ##########################TG##########################
 
 ##########################TGG##########################
@@ -321,6 +327,8 @@ run_pipeline_tests() {
         demos_tg_device "$tt_arch" "$pipeline_type" "$dispatch_mode"
     elif [[ $pipeline_type == *"model_perf_tg_device" ]]; then
         model_perf_tg_device "$tt_arch" "$pipeline_type" "$dispatch_mode"
+    elif [[ $pipeline_type == "ccl_all_gather_perf_tg_device" ]]; then
+        ccl_perf_tg_device
     # TGG pipelines
     elif [[ $pipeline_type == "unit_tgg_device" ]]; then
         unit_tgg_device "$tt_arch" "$pipeline_type" "$dispatch_mode"

diff --git a/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py b/tests/ttnn/unit_tests/operations/ccl/perf/perf_csv.py
@@ -177,10 +177,12 @@ def calculate_bandwidth(row):
         group_df.rename(columns={"INPUT_0_LAYOUT": "Layout", "INPUT_0_DATATYPE": "Data Type"}, inplace=True)
 
         group_df["Input Shape"] = group_df.apply(
-            lambda row: f"[{row['INPUT_0_W']}, {row['INPUT_0_Z']}, {row['INPUT_0_Y']}, {row['INPUT_0_X']}]", axis=1
+            lambda row: f"[{int(row['INPUT_0_W'])}, {int(row['INPUT_0_Z'])}, {int(row['INPUT_0_Y'])}, {int(row['INPUT_0_X'])}]",
+            axis=1,
         )
         group_df["Output Shape"] = group_df.apply(
-            lambda row: f"[{row['OUTPUT_0_W']}, {row['OUTPUT_0_Z']}, {row['OUTPUT_0_Y']}, {row['OUTPUT_0_X']}]", axis=1
+            lambda row: f"[{int(row['OUTPUT_0_W'])}, {int(row['OUTPUT_0_Z'])}, {int(row['OUTPUT_0_Y'])}, {int(row['OUTPUT_0_X'])}]",
+            axis=1,
         )
         group_df["Cycles Count"] = group_df["DEVICE FW END CYCLE"] - group_df["DEVICE FW START CYCLE"]
         group_df[["Op BW [GB/s]", "Link BW [GB/s]"]] = group_df.apply(calculate_bandwidth, axis=1, result_type="expand")

diff --git a/tests/ttnn/unit_tests/operations/ccl/perf/test_ccl_perf.py b/tests/ttnn/unit_tests/operations/ccl/perf/test_ccl_perf.py
@@ -295,9 +295,10 @@ def test_reduce_scatter_on_n300(
     ],
 )
 @pytest.mark.parametrize("replication_factor", [8])
+@pytest.mark.parametrize("num_iters", [20])
 @pytest.mark.parametrize("enable_async", [True])
 @pytest.mark.parametrize("mesh_device", [pytest.param((8, 4), id="8x4_grid")], indirect=True)
-@pytest.mark.parametrize("device_params", [{"trace_region_size": 266240}], indirect=True)
+@pytest.mark.parametrize("device_params", [{"trace_region_size": 532480}], indirect=True)
 def test_all_gather_on_tg(
     mesh_device,
     num_devices,
@@ -311,7 +312,7 @@ def test_all_gather_on_tg(
     function_level_defaults,
     enable_async,
     replication_factor,
-    num_iters=1,
+    num_iters,
 ):
     run_line_all_gather_on_TG_with_mesh_tensor_along_rows(
         mesh_device,