Skip to content

Commit

Permalink
Add N300 perf to pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
Aswinmcw committed Nov 11, 2024
1 parent fbc8a9d commit a47700c
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 7 deletions.
43 changes: 37 additions & 6 deletions .github/workflows/perf-models-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ jobs:
{name: "N300 WH B0", arch: wormhole_b0, runs-on: ["N300", "pipeline-perf", "bare-metal", "in-service"], machine-type: "bare_metal"},
]
model-type: [llm_javelin, cnn_javelin, other]
include:
- test-info: {name: "N300 Perf tests", arch: wormhole_b0, runs-on: ["N300", "pipeline-perf", "bare-metal", "in-service"], machine-type: "bare_metal", tracy: true, owner_id: ULMEPM2MA} # Sean Nijjar
model-type: CCL
name: "${{ matrix.model-type }} ${{ matrix.test-info.name }}"
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
Expand All @@ -32,14 +35,26 @@ jobs:
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV
- uses: actions/download-artifact@v4
- name: Download profiler build artifact
id: download-profiler-artifact
if: ${{ matrix.test-info.tracy }}
uses: actions/download-artifact@v4
with:
name: TTMetal_build_${{ matrix.test-info.arch }}_profiler
continue-on-error: true
- name: Download build artifact
id: download-artifact
if: ${{ !matrix.test-info.tracy }}
uses: actions/download-artifact@v4
with:
name: TTMetal_build_${{ matrix.test-info.arch }}
- name: Extract files
if: ${{ matrix.test-info.tracy && steps.download-profiler-artifact.outcome == 'success' || !matrix.test-info.tracy }}
run: tar -xvf ttm_${{ matrix.test-info.arch }}.tar
- uses: ./.github/actions/install-python-deps
- name: Run performance regressions
id: performance_tests
if: ${{ matrix.test-info.tracy && steps.download-profiler-artifact.outcome == 'success' || !matrix.test-info.tracy }}
timeout-minutes: 70
run: |
source ${{ github.workspace }}/python_env/bin/activate
Expand All @@ -51,12 +66,28 @@ jobs:
# slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }}
- name: Check perf report exists
id: check-perf-report
if: ${{ !cancelled() }}
if: ${{ !cancelled() && (matrix.test-info.tracy && steps.download-profiler-artifact.outcome == 'success' || !matrix.test-info.tracy) }}
run: |
ls -hal
export PERF_REPORT_FILENAME=Models_Perf_$(date +%Y_%m_%d).csv
ls -hal $PERF_REPORT_FILENAME
echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT"
TODAY=$(date +%Y_%m_%d)
PERF_REPORT_FILENAME_MODELS="Models_Perf_${TODAY}.csv"
PERF_REPORT_FILENAME_CCL="CCL_Perf_${TODAY}.csv"
if [ "${{ matrix.test-info.tracy }}" == "true" ]; then
if [ -f "$PERF_REPORT_FILENAME_CCL" ]; then
echo "Found CCL Perf report: $PERF_REPORT_FILENAME_CCL"
echo "perf_report_filename=$PERF_REPORT_FILENAME_CCL" >> "$GITHUB_OUTPUT"
else
echo "No CCL perf report found for today."
exit 1
fi
else
if [ -f "$PERF_REPORT_FILENAME_MODELS" ]; then
echo "Found Models Perf report: $PERF_REPORT_FILENAME_MODELS"
echo "perf_report_filename=$PERF_REPORT_FILENAME_MODELS" >> "$GITHUB_OUTPUT"
else
echo "No Models perf report found for today."
exit 1
fi
fi
- name: Upload perf report
if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }}
uses: actions/upload-artifact@v4
Expand Down
8 changes: 7 additions & 1 deletion .github/workflows/perf-models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@ jobs:
build-artifact:
uses: ./.github/workflows/build-artifact.yaml
secrets: inherit
build-artifact-profiler:
uses: ./.github/workflows/build-artifact.yaml
with:
arch: '["wormhole_b0"]'
tracy: true
secrets: inherit
models-perf:
needs: build-artifact
needs: [build-artifact, build-artifact-profiler]
uses: ./.github/workflows/perf-models-impl.yaml
secrets: inherit
21 changes: 21 additions & 0 deletions tests/scripts/run_performance.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,25 @@ run_perf_models_other() {
env python models/perf/merge_perf_results.py
}

run_n300_ccl_all_gather_perf_tests() {
# Record the start time
fail=0
start_time=$(date +%s)

echo "LOG_METAL: Running run_n300_ccl_all_gather_perf_tests"

tests/ttnn/unit_tests/operations/ccl/perf/run_all_gather_profile.sh -t n300
fail+=$?

# Record the end time
end_time=$(date +%s)
duration=$((end_time - start_time))
echo "LOG_METAL: run_n300_ccl_all_gather_perf_tests $duration seconds to complete"
if [[ $fail -ne 0 ]]; then
exit 1
fi
}

run_perf_models_llm_javelin() {
local tt_arch=$1
local test_marker=$2
Expand Down Expand Up @@ -182,6 +201,8 @@ main() {
run_perf_models_cnn_javelin "$tt_arch" "$test_marker"
elif [[ "$pipeline_type" == *"other_models_performance"* ]]; then
run_perf_models_other "$tt_arch" "$test_marker"
elif [[ "$pipeline_type" == "CCL_models_performance"* ]]; then
run_n300_ccl_all_gather_perf_tests
else
echo "$pipeline_type is not recoognized performance pipeline" 2>&1
exit 1
Expand Down

0 comments on commit a47700c

Please sign in to comment.