(Single-card) Device perf regressions #1832
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "(Single-card) Device perf tests" | |
on: | |
workflow_dispatch: | |
schedule: | |
- cron: "0 2,7,10,14,17,20,23 * * *" | |
workflow_call: | |
jobs: | |
device-perf: | |
strategy: | |
# Do not fail-fast because we need to ensure all tests go to completion | |
# so we try not to get hanging machines | |
fail-fast: false | |
matrix: | |
test-info: [ | |
{name: "GS", arch: grayskull, runs-on: ["perf-no-reset-grayskull", "self-reset"], machine-type: "bare_metal", model-type: "other", timeout: 40}, | |
# Runs on virtual machine now | |
{name: "N300", arch: wormhole_b0, runs-on: ["model-runner-wormhole_b0"], machine-type: "bare_metal", model-type: "other", timeout: 30}, | |
{name: "Unstable N300", arch: wormhole_b0, runs-on: ["model-runner-wormhole_b0"], machine-type: "bare_metal", model-type: "unstable_models", timeout: 15}, | |
] | |
name: "${{ matrix.test-info.name }} device perf" | |
env: | |
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} | |
ARCH_NAME: ${{ matrix.test-info.arch }} | |
TTNN_CONFIG_OVERRIDES: '{"enable_fast_runtime_mode": true}' | |
LOGURU_LEVEL: INFO | |
environment: dev | |
runs-on: ${{ matrix.test-info.runs-on }} | |
steps: | |
- uses: tenstorrent-metal/metal-workflows/.github/actions/[email protected] | |
- name: Ensure weka mount is active | |
run: | | |
sudo systemctl restart mnt-MLPerf.mount | |
sudo /etc/rc.local | |
ls -al /mnt/MLPerf/bit_error_tests | |
- name: Set up dynamic env vars for build | |
run: | | |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV | |
- name: Build tt-metal and libs | |
run: | | |
./scripts/build_scripts/build_with_profiler_opt.sh | |
./create_venv.sh | |
- name: Run device performance regressions | |
timeout-minutes: ${{ matrix.test-info.timeout }} | |
run: | | |
source python_env/bin/activate | |
./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type ${{ matrix.test-info.model-type }}_device_performance_${{ matrix.test-info.machine-type }} | |
- name: Check device perf report exists | |
id: check-device-perf-report | |
if: ${{ !cancelled() }} | |
run: | | |
ls -hal | |
export DEVICE_PERF_REPORT_FILENAME=Models_Device_Perf_$(date +%Y_%m_%d).csv | |
ls -hal $DEVICE_PERF_REPORT_FILENAME | |
echo "device_perf_report_filename=$DEVICE_PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT" | |
- name: Upload device perf report | |
if: ${{ !cancelled() && steps.check-device-perf-report.conclusion == 'success' }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: device-perf-report-csv-${{ matrix.test-info.arch }}-${{ matrix.test-info.machine-type }}-${{ matrix.test-info.model-type }} | |
path: "${{ steps.check-device-perf-report.outputs.device_perf_report_filename }}" |