diff --git a/.github/workflows/fast-dispatch-full-regressions-and-models-impl.yaml b/.github/workflows/fast-dispatch-full-regressions-and-models-impl.yaml new file mode 100644 index 00000000000..03fc201433b --- /dev/null +++ b/.github/workflows/fast-dispatch-full-regressions-and-models-impl.yaml @@ -0,0 +1,169 @@ +name: "[internal] Nightly fast dispatch tests impl" + +on: + workflow_call: + +jobs: + fd-nightly: + strategy: + # Do not fail-fast because we need to ensure all tests go to completion + # so we try not to get hanging machines + fail-fast: false + matrix: + test-group: + [ + { + name: "Common models GS", + arch: grayskull, + runs-on: ["cloud-virtual-machine", "E150", "in-service"], + cmd: tests/scripts/single_card/nightly/run_common_models.sh, + timeout: 40 + }, + { + name: "GS ttnn nightly", + arch: grayskull, + runs-on: ["cloud-virtual-machine", "E150", "in-service"], + cmd: tests/scripts/single_card/nightly/run_ttnn.sh, + timeout: 40 + }, + { + name: "WH N150 ttnn nightly", + arch: wormhole_b0, + runs-on: ["cloud-virtual-machine", "N150", "in-service"], + cmd: tests/scripts/single_card/nightly/run_ttnn.sh, + timeout: 70 + }, + { + name: "WH N300 ttnn nightly", + arch: wormhole_b0, + runs-on: ["cloud-virtual-machine", "N300", "in-service"], + cmd: tests/scripts/single_card/nightly/run_ttnn.sh, + timeout: 70 + }, + { + name: "GS-only models", + arch: grayskull, + runs-on: ["cloud-virtual-machine", "E150", "in-service"], + cmd: tests/scripts/single_card/nightly/run_gs_only.sh, + timeout: 40 + }, + { + name: "API tests GS", + arch: grayskull, + runs-on: ["cloud-virtual-machine", "E150", "in-service"], + cmd: ./tests/scripts/run_tests.sh --tt-arch grayskull --pipeline-type frequent_api --dispatch-mode fast, + timeout: 10 + }, + { + name: "API tests N300 WH B0", + arch: wormhole_b0, + runs-on: ["cloud-virtual-machine", "N300", "in-service"], + cmd: ./tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type frequent_api --dispatch-mode fast, + timeout: 10 + }, + { + name: "API tests N150 WH B0", + arch: wormhole_b0, + runs-on: ["cloud-virtual-machine", "N150", "in-service"], + cmd: ./tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type frequent_api --dispatch-mode fast, + timeout: 10 + }, + { + name: "[Unstable] N150 models", + arch: wormhole_b0, + runs-on: ["cloud-virtual-machine", "N150", "in-service"], + cmd: tests/scripts/single_card/nightly/run_wh_b0_unstable.sh, + timeout: 55 + }, + { + name: "[Unstable] N300 models", + arch: wormhole_b0, + runs-on: ["cloud-virtual-machine", "N300", "in-service"], + cmd: tests/scripts/single_card/nightly/run_wh_b0_unstable.sh, + timeout: 55 + }, + ] + name: FD ${{ matrix.test-group.name }} ${{ matrix.test-group.arch }} + env: + ARCH_NAME: ${{ matrix.test-group.arch }} + LOGURU_LEVEL: INFO + LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib + runs-on: ${{ matrix.test-group.runs-on }} + steps: + - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 + - uses: ./.github/actions/retry-command + with: + timeout-seconds: 100 + max-retries: 10 + backoff-seconds: 60 + command: ./.github/scripts/cloud_utils/mount_weka.sh + - name: Set up dyanmic env vars for build + run: | + echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV + - uses: actions/download-artifact@v4 + with: + name: TTMetal_build_${{ matrix.test-group.arch }} + - name: Extract files + run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar + - uses: ./.github/actions/install-python-deps + - name: Run frequent reg tests scripts + timeout-minutes: ${{ matrix.test-group.timeout }} + run: | + source ${{ github.workspace }}/python_env/bin/activate + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + ${{ matrix.test-group.cmd }} + - uses: ./.github/actions/upload-artifact-with-job-uuid + if: ${{ !cancelled() }} + with: + path: | + generated/test_reports/ + prefix: "test_reports_" + nightly-wh-models: + strategy: + # Do not fail-fast because we need to ensure all tests go to completion + # so we try not to get hanging machines + fail-fast: false + matrix: + card: [N150, N300] + model: [common_models, functional_unet, llama31_8b, mamba, mistral7b, mistral7b_eth, resnet50] + name: Nightly ${{ matrix.card }} ${{ matrix.model }} + env: + ARCH_NAME: wormhole_b0 + LOGURU_LEVEL: INFO + LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib + runs-on: ["cloud-virtual-machine", "in-service", "${{ matrix.card }}"] + steps: + - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 + - uses: ./.github/actions/retry-command + with: + timeout-seconds: 100 + max-retries: 10 + backoff-seconds: 60 + command: ./.github/scripts/cloud_utils/mount_weka.sh + - name: Set up dyanmic env vars for build + run: | + echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV + - name: Set up WH_ARCH_YAML for eth-enabled models + if: ${{ matrix.model != 'mistral7b' }} + run: | + echo "WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml" >> $GITHUB_ENV + - uses: actions/download-artifact@v4 + with: + name: TTMetal_build_wormhole_b0 + - name: Extract files + run: tar -xvf ttm_wormhole_b0.tar + - uses: ./.github/actions/install-python-deps + - name: Run frequent reg tests scripts + timeout-minutes: 50 + run: | + source ${{ github.workspace }}/python_env/bin/activate + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + pytest -n auto tests/nightly/single_card/${{ matrix.model }} + - uses: ./.github/actions/upload-artifact-with-job-uuid + if: ${{ !cancelled() }} + with: + path: | + generated/test_reports/ + prefix: "test_reports_" diff --git a/.github/workflows/fast-dispatch-full-regressions-and-models.yaml b/.github/workflows/fast-dispatch-full-regressions-and-models.yaml index bbaae0cd777..409833993f4 100644 --- a/.github/workflows/fast-dispatch-full-regressions-and-models.yaml +++ b/.github/workflows/fast-dispatch-full-regressions-and-models.yaml @@ -12,166 +12,5 @@ jobs: secrets: inherit fd-nightly: needs: build-artifact - strategy: - # Do not fail-fast because we need to ensure all tests go to completion - # so we try not to get hanging machines - fail-fast: false - matrix: - test-group: - [ - { - name: "Common models GS", - arch: grayskull, - runs-on: ["cloud-virtual-machine", "E150", "in-service"], - cmd: tests/scripts/single_card/nightly/run_common_models.sh, - timeout: 40 - }, - { - name: "GS ttnn nightly", - arch: grayskull, - runs-on: ["cloud-virtual-machine", "E150", "in-service"], - cmd: tests/scripts/single_card/nightly/run_ttnn.sh, - timeout: 40 - }, - { - name: "WH N150 ttnn nightly", - arch: wormhole_b0, - runs-on: ["cloud-virtual-machine", "N150", "in-service"], - cmd: tests/scripts/single_card/nightly/run_ttnn.sh, - timeout: 70 - }, - { - name: "WH N300 ttnn nightly", - arch: wormhole_b0, - runs-on: ["cloud-virtual-machine", "N300", "in-service"], - cmd: tests/scripts/single_card/nightly/run_ttnn.sh, - timeout: 70 - }, - { - name: "GS-only models", - arch: grayskull, - runs-on: ["cloud-virtual-machine", "E150", "in-service"], - cmd: tests/scripts/single_card/nightly/run_gs_only.sh, - timeout: 40 - }, - { - name: "API tests GS", - arch: grayskull, - runs-on: ["cloud-virtual-machine", "E150", "in-service"], - cmd: ./tests/scripts/run_tests.sh --tt-arch grayskull --pipeline-type frequent_api --dispatch-mode fast, - timeout: 10 - }, - { - name: "API tests N300 WH B0", - arch: wormhole_b0, - runs-on: ["cloud-virtual-machine", "N300", "in-service"], - cmd: ./tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type frequent_api --dispatch-mode fast, - timeout: 10 - }, - { - name: "API tests N150 WH B0", - arch: wormhole_b0, - runs-on: ["cloud-virtual-machine", "N150", "in-service"], - cmd: ./tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type frequent_api --dispatch-mode fast, - timeout: 10 - }, - { - name: "[Unstable] N150 models", - arch: wormhole_b0, - runs-on: ["cloud-virtual-machine", "N150", "in-service"], - cmd: tests/scripts/single_card/nightly/run_wh_b0_unstable.sh, - timeout: 55 - }, - { - name: "[Unstable] N300 models", - arch: wormhole_b0, - runs-on: ["cloud-virtual-machine", "N300", "in-service"], - cmd: tests/scripts/single_card/nightly/run_wh_b0_unstable.sh, - timeout: 55 - }, - ] - name: FD ${{ matrix.test-group.name }} ${{ matrix.test-group.arch }} - env: - ARCH_NAME: ${{ matrix.test-group.arch }} - LOGURU_LEVEL: INFO - LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib - runs-on: ${{ matrix.test-group.runs-on }} - steps: - - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 - - uses: ./.github/actions/retry-command - with: - timeout-seconds: 100 - max-retries: 10 - backoff-seconds: 60 - command: ./.github/scripts/cloud_utils/mount_weka.sh - - name: Set up dyanmic env vars for build - run: | - echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV - - uses: actions/download-artifact@v4 - with: - name: TTMetal_build_${{ matrix.test-group.arch }} - - name: Extract files - run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar - - uses: ./.github/actions/install-python-deps - - name: Run frequent reg tests scripts - timeout-minutes: ${{ matrix.test-group.timeout }} - run: | - source ${{ github.workspace }}/python_env/bin/activate - cd $TT_METAL_HOME - export PYTHONPATH=$TT_METAL_HOME - ${{ matrix.test-group.cmd }} - - uses: ./.github/actions/upload-artifact-with-job-uuid - if: ${{ !cancelled() }} - with: - path: | - generated/test_reports/ - prefix: "test_reports_" - nightly-wh-models: - needs: build-artifact - strategy: - # Do not fail-fast because we need to ensure all tests go to completion - # so we try not to get hanging machines - fail-fast: false - matrix: - card: [N150, N300] - model: [common_models, functional_unet, llama31_8b, mamba, mistral7b, mistral7b_eth, resnet50] - name: Nightly ${{ matrix.card }} ${{ matrix.model }} - env: - ARCH_NAME: wormhole_b0 - LOGURU_LEVEL: INFO - LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib - runs-on: ["cloud-virtual-machine", "in-service", "${{ matrix.card }}"] - steps: - - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 - - uses: ./.github/actions/retry-command - with: - timeout-seconds: 100 - max-retries: 10 - backoff-seconds: 60 - command: ./.github/scripts/cloud_utils/mount_weka.sh - - name: Set up dyanmic env vars for build - run: | - echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV - - name: Set up WH_ARCH_YAML for eth-enabled models - if: ${{ matrix.model != 'mistral7b' }} - run: | - echo "WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml" >> $GITHUB_ENV - - uses: actions/download-artifact@v4 - with: - name: TTMetal_build_wormhole_b0 - - name: Extract files - run: tar -xvf ttm_wormhole_b0.tar - - uses: ./.github/actions/install-python-deps - - name: Run frequent reg tests scripts - timeout-minutes: 50 - run: | - source ${{ github.workspace }}/python_env/bin/activate - cd $TT_METAL_HOME - export PYTHONPATH=$TT_METAL_HOME - pytest -n auto tests/nightly/single_card/${{ matrix.model }} - - uses: ./.github/actions/upload-artifact-with-job-uuid - if: ${{ !cancelled() }} - with: - path: | - generated/test_reports/ - prefix: "test_reports_" + uses: ./.github/workflows/fast-dispatch-full-regressions-and-models-impl.yaml + secrets: inherit diff --git a/.github/workflows/perf-device-models-impl.yaml b/.github/workflows/perf-device-models-impl.yaml index 26257731c45..a2a0f7b2782 100644 --- a/.github/workflows/perf-device-models-impl.yaml +++ b/.github/workflows/perf-device-models-impl.yaml @@ -1,4 +1,4 @@ -name: "(internal) Single-card Device perf regressions impl" +name: "[internal] Single-card Device perf regressions impl" on: workflow_call: diff --git a/.github/workflows/perf-models-impl.yaml b/.github/workflows/perf-models-impl.yaml new file mode 100644 index 00000000000..d44cc99e715 --- /dev/null +++ b/.github/workflows/perf-models-impl.yaml @@ -0,0 +1,74 @@ +name: "[internal] Perf models impl" + +on: + workflow_call: + +jobs: + models-perf: + strategy: + # Do not fail-fast because we need to ensure all tests go to completion + # so we try not to get hanging machines + fail-fast: false + matrix: + test-info: [ + {name: "GS", arch: grayskull, runs-on: ["E150", "pipeline-perf", "bare-metal", "in-service"], machine-type: "bare_metal"}, + {name: "N300 WH B0", arch: wormhole_b0, runs-on: ["N300", "pipeline-perf", "bare-metal", "in-service"], machine-type: "bare_metal"}, + ] + model-type: [llm_javelin, cnn_javelin, other] + name: "${{ matrix.model-type }} ${{ matrix.test-info.name }}" + env: + TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} + ARCH_NAME: ${{ matrix.test-info.arch }} + LOGURU_LEVEL: INFO + LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib + environment: dev + runs-on: ${{ matrix.test-info.runs-on }} + steps: + - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 + - name: Enable Performance mode + run: | + sudo cpupower frequency-set -g performance + - name: Ensure weka mount is active + run: | + sudo systemctl restart mnt-MLPerf.mount + sudo /etc/rc.local + ls -al /mnt/MLPerf/bit_error_tests + - name: Set up dynamic env vars for build + run: | + echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV + echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV + - uses: actions/download-artifact@v4 + with: + name: TTMetal_build_${{ matrix.test-info.arch }} + - name: Extract files + run: tar -xvf ttm_${{ matrix.test-info.arch }}.tar + - uses: ./.github/actions/install-python-deps + - name: Run performance regressions + id: performance_tests + timeout-minutes: 70 + run: | + source ${{ github.workspace }}/python_env/bin/activate + ./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type ${{ matrix.model-type }}_models_performance_${{ matrix.test-info.machine-type }} + # TODO: Fix the pipeline before enabling notifications. + #- uses: ./.github/actions/slack-report + # if: ${{ failure() }} + # with: + # slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }} + - name: Check perf report exists + id: check-perf-report + if: ${{ !cancelled() }} + run: | + ls -hal + export PERF_REPORT_FILENAME=Models_Perf_$(date +%Y_%m_%d).csv + ls -hal $PERF_REPORT_FILENAME + echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT" + - name: Upload perf report + if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }} + uses: actions/upload-artifact@v4 + with: + name: perf-report-csv-${{ matrix.model-type }}-${{ matrix.test-info.arch }}-${{ matrix.test-info.machine-type }} + path: "${{ steps.check-perf-report.outputs.perf_report_filename }}" + - name: Disable Performance mode + if: always() + run: | + sudo cpupower frequency-set -g ondemand diff --git a/.github/workflows/perf-models.yaml b/.github/workflows/perf-models.yaml index 6d131a6c718..c7f0d9313a5 100644 --- a/.github/workflows/perf-models.yaml +++ b/.github/workflows/perf-models.yaml @@ -12,70 +12,5 @@ jobs: secrets: inherit models-perf: needs: build-artifact - strategy: - # Do not fail-fast because we need to ensure all tests go to completion - # so we try not to get hanging machines - fail-fast: false - matrix: - test-info: [ - {name: "GS", arch: grayskull, runs-on: ["E150", "pipeline-perf", "bare-metal", "in-service"], machine-type: "bare_metal"}, - {name: "N300 WH B0", arch: wormhole_b0, runs-on: ["N300", "pipeline-perf", "bare-metal", "in-service"], machine-type: "bare_metal"}, - ] - model-type: [llm_javelin, cnn_javelin, other] - name: "${{ matrix.model-type }} ${{ matrix.test-info.name }}" - env: - TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} - ARCH_NAME: ${{ matrix.test-info.arch }} - LOGURU_LEVEL: INFO - LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib - environment: dev - runs-on: ${{ matrix.test-info.runs-on }} - steps: - - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 - - name: Enable Performance mode - run: | - sudo cpupower frequency-set -g performance - - name: Ensure weka mount is active - run: | - sudo systemctl restart mnt-MLPerf.mount - sudo /etc/rc.local - ls -al /mnt/MLPerf/bit_error_tests - - name: Set up dynamic env vars for build - run: | - echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV - echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV - - uses: actions/download-artifact@v4 - with: - name: TTMetal_build_${{ matrix.test-info.arch }} - - name: Extract files - run: tar -xvf ttm_${{ matrix.test-info.arch }}.tar - - uses: ./.github/actions/install-python-deps - - name: Run performance regressions - id: performance_tests - timeout-minutes: 70 - run: | - source ${{ github.workspace }}/python_env/bin/activate - ./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type ${{ matrix.model-type }}_models_performance_${{ matrix.test-info.machine-type }} - # TODO: Fix the pipeline before enabling notifications. - #- uses: ./.github/actions/slack-report - # if: ${{ failure() }} - # with: - # slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }} - - name: Check perf report exists - id: check-perf-report - if: ${{ !cancelled() }} - run: | - ls -hal - export PERF_REPORT_FILENAME=Models_Perf_$(date +%Y_%m_%d).csv - ls -hal $PERF_REPORT_FILENAME - echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT" - - name: Upload perf report - if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }} - uses: actions/upload-artifact@v4 - with: - name: perf-report-csv-${{ matrix.model-type }}-${{ matrix.test-info.arch }}-${{ matrix.test-info.machine-type }} - path: "${{ steps.check-perf-report.outputs.perf_report_filename }}" - - name: Disable Performance mode - if: always() - run: | - sudo cpupower frequency-set -g ondemand + uses: ./.github/workflows/perf-models-impl.yaml + secrets: inherit diff --git a/.github/workflows/pipeline-select.yaml b/.github/workflows/pipeline-select.yaml new file mode 100644 index 00000000000..e4b1e2bcfb3 --- /dev/null +++ b/.github/workflows/pipeline-select.yaml @@ -0,0 +1,71 @@ +name: "Choose your pipeline" + +on: + workflow_dispatch: + inputs: + build-type: + required: false + type: choice + options: + - Release + - Debug + - RelWithDebInfo + - CI + default: "Release" +run-name: ${{ inputs.description }} +jobs: + build-artifact: + uses: ./.github/workflows/build-artifact.yaml + with: + build-type: ${{ inputs.build-type }} + secrets: inherit + build-artifact-profiler: + uses: ./.github/workflows/build-artifact.yaml + with: + build-type: ${{ inputs.build-type }} + tracy: true + secrets: inherit + single-card-demo-tests: + needs: build-artifact + secrets: inherit + uses: ./.github/workflows/single-card-demo-tests-impl.yaml + perf-models-tests: + needs: build-artifact + secrets: inherit + uses: ./.github/workflows/perf-models-impl.yaml + perf-device-models-tests: + needs: build-artifact-profiler + secrets: inherit + uses: ./.github/workflows/perf-device-models-impl.yaml + fast-dispatch-full-regressions-and-models: + needs: build-artifact + secrets: inherit + uses: ./.github/workflows/fast-dispatch-full-regressions-and-models-impl.yaml + t3000-unit-tests: + needs: build-artifact + secrets: inherit + uses: ./.github/workflows/t3000-unit-tests-impl.yaml + t3000-demo-tests: + needs: build-artifact + secrets: inherit + uses: ./.github/workflows/t3000-demo-tests-impl.yaml + t3000-frequent-tests: + needs: build-artifact + secrets: inherit + uses: ./.github/workflows/t3000-frequent-tests-impl.yaml + t3000-nightly-tests: + needs: build-artifact + secrets: inherit + uses: ./.github/workflows/t3000-nightly-tests-impl.yaml + t3000-model-perf-tests: + needs: build-artifact + secrets: inherit + uses: ./.github/workflows/t3000-model-perf-tests-impl.yaml + tgg-unit-tests: + needs: build-artifact + secrets: inherit + uses: ./.github/workflows/tgg-unit-tests-impl.yaml + tg-unit-tests: + needs: build-artifact + secrets: inherit + uses: ./.github/workflows/tg-unit-tests-impl.yaml diff --git a/.github/workflows/single-card-demo-tests-impl.yaml b/.github/workflows/single-card-demo-tests-impl.yaml index f00a90af5ab..2421e7ffa25 100644 --- a/.github/workflows/single-card-demo-tests-impl.yaml +++ b/.github/workflows/single-card-demo-tests-impl.yaml @@ -1,4 +1,4 @@ -name: "(internal) Single-card Demo tests impl" +name: "[internal] Single-card Demo tests impl" on: workflow_call: diff --git a/.github/workflows/t3000-demo-tests-impl.yaml b/.github/workflows/t3000-demo-tests-impl.yaml index 8f3b2e66471..5aaedcfeeb5 100644 --- a/.github/workflows/t3000-demo-tests-impl.yaml +++ b/.github/workflows/t3000-demo-tests-impl.yaml @@ -1,4 +1,4 @@ -name: "(internal) T3000 demo tests impl" +name: "[internal] T3000 demo tests impl" on: workflow_call: diff --git a/.github/workflows/t3000-frequent-tests-impl.yaml b/.github/workflows/t3000-frequent-tests-impl.yaml new file mode 100644 index 00000000000..f72b823fb6d --- /dev/null +++ b/.github/workflows/t3000-frequent-tests-impl.yaml @@ -0,0 +1,52 @@ +name: "[internal] T3000 frequent tests impl" + +on: + workflow_call: + +jobs: + t3000-frequent-tests: + strategy: + fail-fast: false + matrix: + test-group: [ + { name: "t3k tteager tests", arch: wormhole_b0, cmd: run_t3000_tteager_tests, timeout: 60, owner_id: ULMEPM2MA}, #Sean Nijjar + { name: "t3k ethernet tests", arch: wormhole_b0, cmd: run_t3000_ethernet_tests, timeout: 60, owner_id: ULMEPM2MA}, #Sean Nijjar + { name: "t3k trace stress tests", arch: wormhole_b0, cmd: run_t3000_trace_stress_tests, timeout: 120, owner_id: U03NG0A5ND7}, #Aditya Saigal + { name: "t3k falcon40b tests", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 120, owner_id: U04S2UV6L8N}, #Sofija Jovic + { name: "t3k llama2_70b tests", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 60, owner_id: U03FJB5TM5Y}, #Colman Glagovich + { name: "t3k mixtral tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 60, owner_id: U03PUAKE719}, #Miguel Tairum Cruz + { name: "t3k resnet tests", arch: wormhole_b0, cmd: run_t3000_resnet_tests, timeout: 30, owner_id: U013121KDH9}, #Austin Ho + ] + name: ${{ matrix.test-group.name }} + env: + TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} + ARCH_NAME: ${{ matrix.test-group.arch }} + LOGURU_LEVEL: INFO + LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib + environment: dev + runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-functional"] + steps: + - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 + - name: Set up dynamic env vars for build + run: | + echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV + - uses: actions/download-artifact@v4 + with: + name: TTMetal_build_${{ matrix.test-group.arch }} + - name: Extract files + run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar + - uses: ./.github/actions/install-python-deps + - name: Run frequent regression tests + shell: bash {0} + timeout-minutes: ${{ matrix.test-group.timeout }} + run: | + source ${{ github.workspace }}/python_env/bin/activate + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + source ${{ github.workspace }}/tests/scripts/t3000/run_t3000_frequent_tests.sh + ${{ matrix.test-group.cmd }} + - uses: ./.github/actions/slack-report + if: ${{ failure() }} + with: + slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }} + owner: ${{ matrix.test-group.owner_id }} diff --git a/.github/workflows/t3000-frequent-tests.yaml b/.github/workflows/t3000-frequent-tests.yaml index 8014b6d9f63..dd56ffe0aa1 100644 --- a/.github/workflows/t3000-frequent-tests.yaml +++ b/.github/workflows/t3000-frequent-tests.yaml @@ -13,48 +13,5 @@ jobs: secrets: inherit t3000-frequent-tests: needs: build-artifact - strategy: - fail-fast: false - matrix: - test-group: [ - { name: "t3k tteager tests", arch: wormhole_b0, cmd: run_t3000_tteager_tests, timeout: 60, owner_id: ULMEPM2MA}, #Sean Nijjar - { name: "t3k ethernet tests", arch: wormhole_b0, cmd: run_t3000_ethernet_tests, timeout: 60, owner_id: ULMEPM2MA}, #Sean Nijjar - { name: "t3k trace stress tests", arch: wormhole_b0, cmd: run_t3000_trace_stress_tests, timeout: 120, owner_id: U03NG0A5ND7}, #Aditya Saigal - { name: "t3k falcon40b tests", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 120, owner_id: U04S2UV6L8N}, #Sofija Jovic - { name: "t3k llama2_70b tests", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 60, owner_id: U03FJB5TM5Y}, #Colman Glagovich - { name: "t3k mixtral tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 60, owner_id: U03PUAKE719}, #Miguel Tairum Cruz - { name: "t3k resnet tests", arch: wormhole_b0, cmd: run_t3000_resnet_tests, timeout: 30, owner_id: U013121KDH9}, #Austin Ho - ] - name: ${{ matrix.test-group.name }} - env: - TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} - ARCH_NAME: ${{ matrix.test-group.arch }} - LOGURU_LEVEL: INFO - LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib - environment: dev - runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-functional"] - steps: - - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 - - name: Set up dynamic env vars for build - run: | - echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV - - uses: actions/download-artifact@v4 - with: - name: TTMetal_build_${{ matrix.test-group.arch }} - - name: Extract files - run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar - - uses: ./.github/actions/install-python-deps - - name: Run frequent regression tests - shell: bash {0} - timeout-minutes: ${{ matrix.test-group.timeout }} - run: | - source ${{ github.workspace }}/python_env/bin/activate - cd $TT_METAL_HOME - export PYTHONPATH=$TT_METAL_HOME - source ${{ github.workspace }}/tests/scripts/t3000/run_t3000_frequent_tests.sh - ${{ matrix.test-group.cmd }} - - uses: ./.github/actions/slack-report - if: ${{ failure() }} - with: - slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }} - owner: ${{ matrix.test-group.owner_id }} + secrets: inherit + uses: ./.github/workflows/t3000-frequent-tests-impl.yaml diff --git a/.github/workflows/t3000-model-perf-tests-impl.yaml b/.github/workflows/t3000-model-perf-tests-impl.yaml new file mode 100644 index 00000000000..2d1a9fc0ca9 --- /dev/null +++ b/.github/workflows/t3000-model-perf-tests-impl.yaml @@ -0,0 +1,80 @@ +name: "[internal] T3000 model perf tests impl" + +on: + workflow_call: + +jobs: + + t3000-model-perf-tests: + strategy: + fail-fast: false + matrix: + test-group: [ + { name: "t3k LLM falcon7b model perf tests", model: "falcob7b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon7b_tests, timeout: 75, owner_id: U053W15B6JF}, # Djordje Ivanovic + { name: "t3k LLM mixtral model perf tests", model: "mixtral", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 75, owner_id: U03PUAKE719}, # Miguel Tairum + { name: "t3k LLM llama2 model perf tests", model: "llama2", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 75, owner_id: U03FJB5TM5Y}, # Colman Glagovich + { name: "t3k LLM falcon40b model perf tests", model: "falcon40b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 75, owner_id: U053W15B6JF}, # Djordje Ivanovic + { name: "t3k CNN resnet50 model perf tests", model: "resnet50", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_resnet50_tests, timeout: 75, owner_id: U013121KDH9}, # Austin Ho + #{ name: "t3k CNN model perf tests ", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_cnn_tests, timeout: 120, owner_id: }, #No tests are being run? + ] + name: ${{ matrix.test-group.name }} + env: + TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} + ARCH_NAME: ${{ matrix.test-group.arch }} + LOGURU_LEVEL: INFO + LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib + environment: dev + runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-perf"] + steps: + - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 + - name: Enable performance mode + run: | + sudo cpupower frequency-set -g performance + - name: Ensure weka mount is active + run: | + sudo systemctl restart mnt-MLPerf.mount + sudo /etc/rc.local + ls -al /mnt/MLPerf/bit_error_tests + - name: Set up dynamic env vars for build + run: | + echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV + echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV + - uses: actions/download-artifact@v4 + with: + name: TTMetal_build_${{ matrix.test-group.arch }} + - name: Extract files + run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar + - uses: ./.github/actions/install-python-deps + - name: Run model perf regression tests + shell: bash {0} + timeout-minutes: ${{ matrix.test-group.timeout }} + run: | + source ${{ github.workspace }}/python_env/bin/activate + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + source ${{ github.workspace }}/tests/scripts/t3000/run_t3000_model_perf_tests.sh + ${{ matrix.test-group.cmd }} + env python models/perf/merge_perf_results.py + - name: Check perf report exists + id: check-perf-report + if: ${{ !cancelled() }} + run: | + ls -hal + export PERF_REPORT_FILENAME="Models_Perf_$(date +%Y_%m_%d).csv" + ls -hal $PERF_REPORT_FILENAME + echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT" + - name: Upload perf report + if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }} + uses: actions/upload-artifact@v4 + with: + name: perf-report-csv-${{ matrix.test-group.model-type }}-${{ matrix.test-group.arch }}-${{ matrix.test-group.model }}-bare-metal + path: "${{ steps.check-perf-report.outputs.perf_report_filename }}" + - uses: ./.github/actions/slack-report + if: ${{ failure() }} + with: + slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }} + owner: ${{ matrix.test-group.owner_id }} + - name: Disable performance mode + if: always() + run: | + sudo cpupower frequency-set -g ondemand diff --git a/.github/workflows/t3000-model-perf-tests.yaml b/.github/workflows/t3000-model-perf-tests.yaml index 3220d9a4782..0a8759af27c 100644 --- a/.github/workflows/t3000-model-perf-tests.yaml +++ b/.github/workflows/t3000-model-perf-tests.yaml @@ -13,75 +13,5 @@ jobs: secrets: inherit t3000-model-perf-tests: needs: build-artifact - strategy: - fail-fast: false - matrix: - test-group: [ - { name: "t3k LLM falcon7b model perf tests", model: "falcob7b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon7b_tests, timeout: 75, owner_id: U053W15B6JF}, # Djordje Ivanovic - { name: "t3k LLM mixtral model perf tests", model: "mixtral", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 75, owner_id: U03PUAKE719}, # Miguel Tairum - { name: "t3k LLM llama2 model perf tests", model: "llama2", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 75, owner_id: U03FJB5TM5Y}, # Colman Glagovich - { name: "t3k LLM falcon40b model perf tests", model: "falcon40b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 75, owner_id: U053W15B6JF}, # Djordje Ivanovic - { name: "t3k CNN resnet50 model perf tests", model: "resnet50", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_resnet50_tests, timeout: 75, owner_id: U013121KDH9}, # Austin Ho - #{ name: "t3k CNN model perf tests ", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_cnn_tests, timeout: 120, owner_id: }, #No tests are being run? - ] - name: ${{ matrix.test-group.name }} - env: - TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} - ARCH_NAME: ${{ matrix.test-group.arch }} - LOGURU_LEVEL: INFO - LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib - environment: dev - runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-perf"] - steps: - - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 - - name: Enable performance mode - run: | - sudo cpupower frequency-set -g performance - - name: Ensure weka mount is active - run: | - sudo systemctl restart mnt-MLPerf.mount - sudo /etc/rc.local - ls -al /mnt/MLPerf/bit_error_tests - - name: Set up dynamic env vars for build - run: | - echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV - echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV - - uses: actions/download-artifact@v4 - with: - name: TTMetal_build_${{ matrix.test-group.arch }} - - name: Extract files - run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar - - uses: ./.github/actions/install-python-deps - - name: Run model perf regression tests - shell: bash {0} - timeout-minutes: ${{ matrix.test-group.timeout }} - run: | - source ${{ github.workspace }}/python_env/bin/activate - cd $TT_METAL_HOME - export PYTHONPATH=$TT_METAL_HOME - source ${{ github.workspace }}/tests/scripts/t3000/run_t3000_model_perf_tests.sh - ${{ matrix.test-group.cmd }} - env python models/perf/merge_perf_results.py - - name: Check perf report exists - id: check-perf-report - if: ${{ !cancelled() }} - run: | - ls -hal - export PERF_REPORT_FILENAME="Models_Perf_$(date +%Y_%m_%d).csv" - ls -hal $PERF_REPORT_FILENAME - echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT" - - name: Upload perf report - if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }} - uses: actions/upload-artifact@v4 - with: - name: perf-report-csv-${{ matrix.test-group.model-type }}-${{ matrix.test-group.arch }}-${{ matrix.test-group.model }}-bare-metal - path: "${{ steps.check-perf-report.outputs.perf_report_filename }}" - - uses: ./.github/actions/slack-report - if: ${{ failure() }} - with: - slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }} - owner: ${{ matrix.test-group.owner_id }} - - name: Disable performance mode - if: always() - run: | - sudo cpupower frequency-set -g ondemand + secrets: inherit + uses: ./.github/workflows/t3000-model-perf-tests-impl.yaml diff --git a/.github/workflows/t3000-nightly-tests-impl.yaml b/.github/workflows/t3000-nightly-tests-impl.yaml new file mode 100644 index 00000000000..ee82ca340d5 --- /dev/null +++ b/.github/workflows/t3000-nightly-tests-impl.yaml @@ -0,0 +1,47 @@ +name: "[internal] T3000 nightly tests impl" + +on: + workflow_call: + +jobs: + t3000-nightly-tests: + strategy: + fail-fast: false + matrix: + test-group: [ + { name: "t3k_ccl_tests", arch: wormhole_b0, cmd: run_t3000_ccl_tests, timeout: 180, owner_id: ULMEPM2MA}, # Sean Nijjar + ] + + name: ${{ matrix.test-group.name }} + env: + TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} + ARCH_NAME: ${{ matrix.test-group.arch }} + LOGURU_LEVEL: INFO + LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib + environment: dev + runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-functional"] + steps: + - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 + - name: Set up dynamic env vars for build + run: | + echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV + - uses: actions/download-artifact@v4 + with: + name: TTMetal_build_${{ matrix.test-group.arch }} + - name: Extract files + run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar + - uses: ./.github/actions/install-python-deps + - name: Run demo regression tests + shell: bash {0} + timeout-minutes: ${{ matrix.test-group.timeout }} + run: | + source ${{ github.workspace }}/python_env/bin/activate + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + source ${{ github.workspace }}/tests/scripts/t3000/run_t3000_nightly_tests.sh + ${{ matrix.test-group.cmd }} + - uses: ./.github/actions/slack-report + if: ${{ failure() }} + with: + slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }} + owner: ${{ matrix.test-group.owner_id }} diff --git a/.github/workflows/t3000-nightly-tests.yaml b/.github/workflows/t3000-nightly-tests.yaml index ce9ee839d37..58944fa282f 100644 --- a/.github/workflows/t3000-nightly-tests.yaml +++ b/.github/workflows/t3000-nightly-tests.yaml @@ -13,43 +13,5 @@ jobs: secrets: inherit t3000-nightly-tests: needs: build-artifact - strategy: - fail-fast: false - matrix: - test-group: [ - { name: "t3k_ccl_tests", arch: wormhole_b0, cmd: run_t3000_ccl_tests, timeout: 180, owner_id: ULMEPM2MA}, # Sean Nijjar - ] - - name: ${{ matrix.test-group.name }} - env: - TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} - ARCH_NAME: ${{ matrix.test-group.arch }} - LOGURU_LEVEL: INFO - LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib - environment: dev - runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-functional"] - steps: - - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 - - name: Set up dynamic env vars for build - run: | - echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV - - uses: actions/download-artifact@v4 - with: - name: TTMetal_build_${{ matrix.test-group.arch }} - - name: Extract files - run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar - - uses: ./.github/actions/install-python-deps - - name: Run demo regression tests - shell: bash {0} - timeout-minutes: ${{ matrix.test-group.timeout }} - run: | - source ${{ github.workspace }}/python_env/bin/activate - cd $TT_METAL_HOME - export PYTHONPATH=$TT_METAL_HOME - source ${{ github.workspace }}/tests/scripts/t3000/run_t3000_nightly_tests.sh - ${{ matrix.test-group.cmd }} - - uses: ./.github/actions/slack-report - if: ${{ failure() }} - with: - slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }} - owner: ${{ matrix.test-group.owner_id }} + secrets: inherit + uses: ./.github/workflows/t3000-nightly-tests-impl.yaml diff --git a/.github/workflows/t3000-unit-tests-impl.yaml b/.github/workflows/t3000-unit-tests-impl.yaml new file mode 100644 index 00000000000..a84c55120d0 --- /dev/null +++ b/.github/workflows/t3000-unit-tests-impl.yaml @@ -0,0 +1,52 @@ +name: "[internal] T3000 unit tests impl" + +on: + workflow_call: + +jobs: + t3000-unit-tests: + strategy: + fail-fast: false + matrix: + test-group: [ + { name: "t3k ttmetal tests", arch: wormhole_b0, cmd: run_t3000_ttmetal_tests, timeout: 30, owner_id: ULMEPM2MA}, #Sean Nijjar + { name: "t3k ttnn tests", arch: wormhole_b0, cmd: run_t3000_ttnn_tests, timeout: 120, owner_id: UBHPP2NDP}, #Joseph Chu + { name: "t3k falcon7b tests", arch: wormhole_b0, cmd: run_t3000_falcon7b_tests, timeout: 30, owner_id: UBHPP2NDP}, #Joseph Chu + { name: "t3k falcon40b tests", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 30, owner_id: U053W15B6JF}, #Djordje Ivanovic + { name: "t3k mixtral tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 30, owner_id: U03PUAKE719}, #Miguel Tairum Cruz + { name: "t3k grok tests", arch: wormhole_b0, cmd: run_t3000_grok_tests, timeout: 30, owner_id: U03HY7MK4BT}, #Mark O'Connor + { name: "t3k unet shallow tests", arch: wormhole_b0, cmd: run_t3000_unet_shallow_tests, timeout: 30, owner_id: U06ECNVR0EN}, #Evan Smal + ] + name: ${{ matrix.test-group.name }} + env: + TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} + ARCH_NAME: ${{ matrix.test-group.arch }} + LOGURU_LEVEL: INFO + LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib + environment: dev + runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-functional"] + steps: + - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 + - name: Set up dynamic env vars for build + run: | + echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV + - uses: actions/download-artifact@v4 + with: + name: TTMetal_build_${{ matrix.test-group.arch }} + - name: Extract files + run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar + - uses: ./.github/actions/install-python-deps + - name: Run unit regression tests + shell: bash {0} + timeout-minutes: ${{ matrix.test-group.timeout }} + run: | + source ${{ github.workspace }}/python_env/bin/activate + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + source ${{ github.workspace }}/tests/scripts/t3000/run_t3000_unit_tests.sh + ${{ matrix.test-group.cmd }} + - uses: ./.github/actions/slack-report + if: ${{ failure() }} + with: + slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }} + owner: ${{ matrix.test-group.owner_id }} diff --git a/.github/workflows/t3000-unit-tests.yaml b/.github/workflows/t3000-unit-tests.yaml index f52a075aff5..c753e82c4ac 100644 --- a/.github/workflows/t3000-unit-tests.yaml +++ b/.github/workflows/t3000-unit-tests.yaml @@ -13,48 +13,5 @@ jobs: secrets: inherit t3000-unit-tests: needs: build-artifact - strategy: - fail-fast: false - matrix: - test-group: [ - { name: "t3k ttmetal tests", arch: wormhole_b0, cmd: run_t3000_ttmetal_tests, timeout: 30, owner_id: ULMEPM2MA}, #Sean Nijjar - { name: "t3k ttnn tests", arch: wormhole_b0, cmd: run_t3000_ttnn_tests, timeout: 120, owner_id: UBHPP2NDP}, #Joseph Chu - { name: "t3k falcon7b tests", arch: wormhole_b0, cmd: run_t3000_falcon7b_tests, timeout: 30, owner_id: UBHPP2NDP}, #Joseph Chu - { name: "t3k falcon40b tests", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 30, owner_id: U053W15B6JF}, #Djordje Ivanovic - { name: "t3k mixtral tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 30, owner_id: U03PUAKE719}, #Miguel Tairum Cruz - { name: "t3k grok tests", arch: wormhole_b0, cmd: run_t3000_grok_tests, timeout: 30, owner_id: U03HY7MK4BT}, #Mark O'Connor - { name: "t3k unet shallow tests", arch: wormhole_b0, cmd: run_t3000_unet_shallow_tests, timeout: 30, owner_id: U06ECNVR0EN}, #Evan Smal - ] - name: ${{ matrix.test-group.name }} - env: - TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} - ARCH_NAME: ${{ matrix.test-group.arch }} - LOGURU_LEVEL: INFO - LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib - environment: dev - runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-functional"] - steps: - - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 - - name: Set up dynamic env vars for build - run: | - echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV - - uses: actions/download-artifact@v4 - with: - name: TTMetal_build_${{ matrix.test-group.arch }} - - name: Extract files - run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar - - uses: ./.github/actions/install-python-deps - - name: Run unit regression tests - shell: bash {0} - timeout-minutes: ${{ matrix.test-group.timeout }} - run: | - source ${{ github.workspace }}/python_env/bin/activate - cd $TT_METAL_HOME - export PYTHONPATH=$TT_METAL_HOME - source ${{ github.workspace }}/tests/scripts/t3000/run_t3000_unit_tests.sh - ${{ matrix.test-group.cmd }} - - uses: ./.github/actions/slack-report - if: ${{ failure() }} - with: - slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }} - owner: ${{ matrix.test-group.owner_id }} + secrets: inherit + uses: ./.github/workflows/t3000-unit-tests-impl.yaml diff --git a/.github/workflows/tg-unit-tests-impl.yaml b/.github/workflows/tg-unit-tests-impl.yaml new file mode 100644 index 00000000000..a4bb3ed4eee --- /dev/null +++ b/.github/workflows/tg-unit-tests-impl.yaml @@ -0,0 +1,77 @@ +name: "[internal] TG unit tests impl" + +on: + workflow_call: + +jobs: + TG-UMD-tests: + strategy: + fail-fast: false + matrix: + test-group: [ + { + name: "TG UMD unit tests", + arch: wormhole_b0, + runs-on: ["arch-wormhole_b0", "config-tg", "in-service", "bare-metal", "pipeline-functional"], + cmd: "./build/test/umd/galaxy/unit_tests_glx" + }, + ] + name: ${{ matrix.test-group.name }} + env: + TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} + ARCH_NAME: ${{ matrix.test-group.arch }} + LOGURU_LEVEL: INFO + environment: dev + runs-on: ${{ matrix.test-group.runs-on }} + steps: + - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 + - name: Set up dynamic env vars for build + run: | + echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV + - name: Build UMD device and tests + run: | + cmake -B build -G Ninja + cmake --build build --target umd_tests + - name: Run UMD unit regression tests + timeout-minutes: 10 + run: | + cd $TT_METAL_HOME + ${{ matrix.test-group.cmd }} + TG-tests: + strategy: + fail-fast: false + matrix: + test-group: [ + { + name: "TG unit tests", + arch: wormhole_b0, + runs-on: ["config-tg", "in-service", "bare-metal", "pipeline-functional"], + cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type unit_tg_device --dispatch-mode ""' + }, + ] + name: ${{ matrix.test-group.name }} + env: + TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} + ARCH_NAME: ${{ matrix.test-group.arch }} + LOGURU_LEVEL: INFO + LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib + environment: dev + runs-on: ${{ matrix.test-group.runs-on }} + steps: + - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 + - name: Set up dynamic env vars for build + run: | + echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV + - uses: actions/download-artifact@v4 + with: + name: TTMetal_build_${{ matrix.test-group.arch }} + - name: Extract files + run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar + - uses: ./.github/actions/install-python-deps + - name: Run unit regression tests + timeout-minutes: 45 + run: | + source ${{ github.workspace }}/python_env/bin/activate + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + ${{ matrix.test-group.cmd }} diff --git a/.github/workflows/tg-unit-tests.yaml b/.github/workflows/tg-unit-tests.yaml index 4c2190cd8db..0fe20e58ee6 100644 --- a/.github/workflows/tg-unit-tests.yaml +++ b/.github/workflows/tg-unit-tests.yaml @@ -12,74 +12,6 @@ jobs: arch: '["wormhole_b0"]' secrets: inherit TG-UMD-tests: - strategy: - fail-fast: false - matrix: - test-group: [ - { - name: "TG UMD unit tests", - arch: wormhole_b0, - runs-on: ["arch-wormhole_b0", "config-tg", "in-service", "bare-metal", "pipeline-functional"], - cmd: "./build/test/umd/galaxy/unit_tests_glx" - }, - ] - name: ${{ matrix.test-group.name }} - env: - TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} - ARCH_NAME: ${{ matrix.test-group.arch }} - LOGURU_LEVEL: INFO - environment: dev - runs-on: ${{ matrix.test-group.runs-on }} - steps: - - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 - - name: Set up dynamic env vars for build - run: | - echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV - - name: Build UMD device and tests - run: | - cmake -B build -G Ninja - cmake --build build --target umd_tests - - name: Run UMD unit regression tests - timeout-minutes: 10 - run: | - cd $TT_METAL_HOME - ${{ matrix.test-group.cmd }} - TG-tests: needs: build-artifact - strategy: - fail-fast: false - matrix: - test-group: [ - { - name: "TG unit tests", - arch: wormhole_b0, - runs-on: ["config-tg", "in-service", "bare-metal", "pipeline-functional"], - cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type unit_tg_device --dispatch-mode ""' - }, - ] - name: ${{ matrix.test-group.name }} - env: - TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} - ARCH_NAME: ${{ matrix.test-group.arch }} - LOGURU_LEVEL: INFO - LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib - environment: dev - runs-on: ${{ matrix.test-group.runs-on }} - steps: - - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 - - name: Set up dynamic env vars for build - run: | - echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV - - uses: actions/download-artifact@v4 - with: - name: TTMetal_build_${{ matrix.test-group.arch }} - - name: Extract files - run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar - - uses: ./.github/actions/install-python-deps - - name: Run unit regression tests - timeout-minutes: 45 - run: | - source ${{ github.workspace }}/python_env/bin/activate - cd $TT_METAL_HOME - export PYTHONPATH=$TT_METAL_HOME - ${{ matrix.test-group.cmd }} + secrets: inherit + uses: ./.github/workflows/tg-unit-tests-impl.yaml diff --git a/.github/workflows/tgg-unit-tests-impl.yaml b/.github/workflows/tgg-unit-tests-impl.yaml new file mode 100644 index 00000000000..88f980ccbfc --- /dev/null +++ b/.github/workflows/tgg-unit-tests-impl.yaml @@ -0,0 +1,43 @@ +name: "[internal] TGG unit tests impl" + +on: + workflow_call: + +jobs: + TGG-tests: + strategy: + fail-fast: false + matrix: + test-group: [ + { + name: "TGG unit tests", + arch: wormhole_b0, + runs-on: ["config-tgg", "in-service", "bare-metal", "pipeline-functional"], + cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type unit_tgg_device --dispatch-mode ""' + }, + ] + name: ${{ matrix.test-group.name }} + env: + TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} + ARCH_NAME: ${{ matrix.test-group.arch }} + LOGURU_LEVEL: INFO + LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib + runs-on: ${{ matrix.test-group.runs-on }} + steps: + - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 + - name: Set up dynamic env vars for build + run: | + echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV + - uses: actions/download-artifact@v4 + with: + name: TTMetal_build_${{ matrix.test-group.arch }} + - name: Extract files + run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar + - uses: ./.github/actions/install-python-deps + - name: Run unit regression tests + timeout-minutes: 45 + run: | + source ${{ github.workspace }}/python_env/bin/activate + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + ${{ matrix.test-group.cmd }} diff --git a/.github/workflows/tgg-unit-tests.yaml b/.github/workflows/tgg-unit-tests.yaml index 28e914574dd..f9be79c02f2 100644 --- a/.github/workflows/tgg-unit-tests.yaml +++ b/.github/workflows/tgg-unit-tests.yaml @@ -13,39 +13,5 @@ jobs: secrets: inherit TGG-tests: needs: build-artifact - strategy: - fail-fast: false - matrix: - test-group: [ - { - name: "TGG unit tests", - arch: wormhole_b0, - runs-on: ["config-tgg", "in-service", "bare-metal", "pipeline-functional"], - cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type unit_tgg_device --dispatch-mode ""' - }, - ] - name: ${{ matrix.test-group.name }} - env: - TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} - ARCH_NAME: ${{ matrix.test-group.arch }} - LOGURU_LEVEL: INFO - LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib - runs-on: ${{ matrix.test-group.runs-on }} - steps: - - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 - - name: Set up dynamic env vars for build - run: | - echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV - - uses: actions/download-artifact@v4 - with: - name: TTMetal_build_${{ matrix.test-group.arch }} - - name: Extract files - run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar - - uses: ./.github/actions/install-python-deps - - name: Run unit regression tests - timeout-minutes: 45 - run: | - source ${{ github.workspace }}/python_env/bin/activate - cd $TT_METAL_HOME - export PYTHONPATH=$TT_METAL_HOME - ${{ matrix.test-group.cmd }} + secrets: inherit + uses: ./.github/workflows/tgg-unit-tests-impl.yaml