-
Notifications
You must be signed in to change notification settings - Fork 92
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#0: Update perf tests to have tighter timeouts to reduce queue time a…
…nd time spent waiting on hangs to timeout (#8552)
- Loading branch information
Showing
2 changed files
with
18 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,18 +13,18 @@ jobs: | |
# so we try not to get hanging machines | ||
fail-fast: false | ||
matrix: | ||
runner-info: [ | ||
{name: "GS", arch: grayskull, runs-on: ["perf-no-reset-grayskull", "self-reset"], machine-type: "bare_metal"}, | ||
{name: "N300 WH B0", arch: wormhole_b0, runs-on: ["perf-wormhole_b0", "self-reset"], machine-type: "bare_metal"}, | ||
test-info: [ | ||
{name: "GS", arch: grayskull, runs-on: ["perf-no-reset-grayskull", "self-reset"], machine-type: "bare_metal", timeout: 40}, | ||
{name: "N300 WH B0", arch: wormhole_b0, runs-on: ["perf-wormhole_b0", "self-reset"], machine-type: "bare_metal", timeout: 20}, | ||
] | ||
name: "${{ matrix.runner-info.name }} device perf" | ||
name: "${{ matrix.test-info.name }} device perf" | ||
env: | ||
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} | ||
ARCH_NAME: ${{ matrix.runner-info.arch }} | ||
ARCH_NAME: ${{ matrix.test-info.arch }} | ||
TTNN_CONFIG_OVERRIDES: '{"enable_fast_runtime_mode": true}' | ||
LOGURU_LEVEL: INFO | ||
environment: dev | ||
runs-on: ${{ matrix.runner-info.runs-on }} | ||
runs-on: ${{ matrix.test-info.runs-on }} | ||
steps: | ||
- uses: tenstorrent-metal/metal-workflows/.github/actions/[email protected] | ||
- name: Ensure weka mount is active | ||
|
@@ -39,10 +39,10 @@ jobs: | |
run: | | ||
./scripts/build_scripts/build_with_profiler_opt.sh | ||
- name: Run device performance regressions | ||
timeout-minutes: 90 | ||
timeout-minutes: ${{ matrix.test-info.timeout }} | ||
run: | | ||
source build/python_env/bin/activate | ||
./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type models_device_performance_${{ matrix.runner-info.machine-type }} | ||
./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type models_device_performance_${{ matrix.test-info.machine-type }} | ||
- name: Check device perf report exists | ||
id: check-device-perf-report | ||
if: ${{ !cancelled() }} | ||
|
@@ -55,5 +55,5 @@ jobs: | |
if: ${{ !cancelled() && steps.check-device-perf-report.conclusion == 'success' }} | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: device-perf-report-csv-${{ matrix.runner-info.arch }}-${{ matrix.runner-info.machine-type }} | ||
name: device-perf-report-csv-${{ matrix.test-info.arch }}-${{ matrix.test-info.machine-type }} | ||
path: "${{ steps.check-device-perf-report.outputs.device_perf_report_filename }}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,19 +17,19 @@ jobs: | |
# so we try not to get hanging machines | ||
fail-fast: false | ||
matrix: | ||
runner-info: [ | ||
test-info: [ | ||
{name: "GS", arch: grayskull, runs-on: ["perf-grayskull", "self-reset"], machine-type: "bare_metal"}, | ||
{name: "N300 WH B0", arch: wormhole_b0, runs-on: ["perf-wormhole_b0", "self-reset"], machine-type: "bare_metal"}, | ||
] | ||
model-type: [llm_javelin, cnn_javelin, other] | ||
name: "${{ matrix.model-type }} ${{ matrix.runner-info.name }}" | ||
name: "${{ matrix.model-type }} ${{ matrix.test-info.name }}" | ||
env: | ||
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} | ||
ARCH_NAME: ${{ matrix.runner-info.arch }} | ||
ARCH_NAME: ${{ matrix.test-info.arch }} | ||
LOGURU_LEVEL: INFO | ||
TTNN_CONFIG_OVERRIDES: '{"enable_fast_runtime_mode": true}' | ||
environment: dev | ||
runs-on: ${{ matrix.runner-info.runs-on }} | ||
runs-on: ${{ matrix.test-info.runs-on }} | ||
steps: | ||
- uses: tenstorrent-metal/metal-workflows/.github/actions/[email protected] | ||
- name: Enable Performance mode | ||
|
@@ -46,16 +46,16 @@ jobs: | |
echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV | ||
- uses: actions/download-artifact@v4 | ||
with: | ||
name: TTMetal_build_${{ matrix.runner-info.arch }} | ||
name: TTMetal_build_${{ matrix.test-info.arch }} | ||
- name: Extract files | ||
run: tar -xvf ttm_${{ matrix.runner-info.arch }}.tar | ||
run: tar -xvf ttm_${{ matrix.test-info.arch }}.tar | ||
- uses: ./.github/actions/install-python-deps | ||
- name: Run performance regressions | ||
id: performance_tests | ||
timeout-minutes: 60 | ||
timeout-minutes: 30 | ||
run: | | ||
source ${{ github.workspace }}/python_env/bin/activate | ||
./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type ${{ matrix.model-type }}_models_performance_${{ matrix.runner-info.machine-type }} | ||
./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type ${{ matrix.model-type }}_models_performance_${{ matrix.test-info.machine-type }} | ||
- uses: ./.github/actions/slack-report | ||
if: ${{ steps.performance_tests.outcome != 'success' }} | ||
with: | ||
|
@@ -72,7 +72,7 @@ jobs: | |
if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }} | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: perf-report-csv-${{ matrix.model-type }}-${{ matrix.runner-info.arch }}-${{ matrix.runner-info.machine-type }} | ||
name: perf-report-csv-${{ matrix.model-type }}-${{ matrix.test-info.arch }}-${{ matrix.test-info.machine-type }} | ||
path: "${{ steps.check-perf-report.outputs.perf_report_filename }}" | ||
- name: Disable Performance mode | ||
if: always() | ||
|