.github/workflows/fast-dispatch-full-regressions-and-models.yaml

name: Nightly fast dispatch tests

on:
  workflow_dispatch:
  workflow_call:
  schedule:
    - cron: "0 */2 * * *"

jobs:
  build-artifact:
    uses: ./.github/workflows/build-artifact.yaml
    secrets: inherit
  fd-nightly:
    needs: build-artifact
    strategy:
      # Do not fail-fast because we need to ensure all tests go to completion
      # so we try not to get hanging machines
      fail-fast: false
      matrix:
        test-group:
          [
            {
              name: "Common models GS",
              arch: grayskull,
              runs-on: ["cloud-virtual-machine", "E150", "in-service"],
              cmd: tests/scripts/single_card/nightly/run_common_models.sh,
              timeout: 40
            },
            {
              name: "GS ttnn nightly",
              arch: grayskull,
              runs-on: ["cloud-virtual-machine", "E150", "in-service"],
              cmd: tests/scripts/single_card/nightly/run_ttnn.sh,
              timeout: 40
            },
            {
              name: "WH N150 ttnn nightly",
              arch: wormhole_b0,
              runs-on: ["cloud-virtual-machine", "N150", "in-service"],
              cmd: tests/scripts/single_card/nightly/run_ttnn.sh,
              timeout: 70
            },
            {
              name: "WH N300 ttnn nightly",
              arch: wormhole_b0,
              runs-on: ["cloud-virtual-machine", "N300", "in-service"],
              cmd: tests/scripts/single_card/nightly/run_ttnn.sh,
              timeout: 70
            },
            {
              name: "GS-only models",
              arch: grayskull,
              runs-on: ["cloud-virtual-machine", "E150", "in-service"],
              cmd: tests/scripts/single_card/nightly/run_gs_only.sh,
              timeout: 40
            },
            {
              name: "API tests GS",
              arch: grayskull,
              runs-on: ["cloud-virtual-machine", "E150", "in-service"],
              cmd: ./tests/scripts/run_tests.sh --tt-arch grayskull --pipeline-type frequent_api --dispatch-mode fast,
              timeout: 10
            },
            {
              name: "API tests N300 WH B0",
              arch: wormhole_b0,
              runs-on: ["cloud-virtual-machine", "N300", "in-service"],
              cmd: ./tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type frequent_api --dispatch-mode fast,
              timeout: 10
            },
            {
              name: "API tests N150 WH B0",
              arch: wormhole_b0,
              runs-on: ["cloud-virtual-machine", "N150", "in-service"],
              cmd: ./tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type frequent_api --dispatch-mode fast,
              timeout: 10
            },
            {
              name: "[Unstable] N150 models",
              arch: wormhole_b0,
              runs-on: ["cloud-virtual-machine", "N150", "in-service"],
              cmd: tests/scripts/single_card/nightly/run_wh_b0_unstable.sh,
              timeout: 55
            },
            {
              name: "[Unstable] N300 models",
              arch: wormhole_b0,
              runs-on: ["cloud-virtual-machine", "N300", "in-service"],
              cmd: tests/scripts/single_card/nightly/run_wh_b0_unstable.sh,
              timeout: 55
            },
          ]
    name: FD ${{ matrix.test-group.name }} ${{ matrix.test-group.arch }}
    env:
      ARCH_NAME: ${{ matrix.test-group.arch }}
      LOGURU_LEVEL: INFO
      LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
    runs-on: ${{ matrix.test-group.runs-on }}
    steps:
      - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
      - uses: ./.github/actions/retry-command
        with:
          timeout-seconds: 100
          max-retries: 10
          backoff-seconds: 60
          command: ./.github/scripts/cloud_utils/mount_weka.sh
      - name: Set up dyanmic env vars for build
        run: |
          echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
      - uses: actions/download-artifact@v4
        with:
          name: TTMetal_build_${{ matrix.test-group.arch }}
      - name: Extract files
        run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
      - uses: ./.github/actions/install-python-deps
      - name: Run frequent reg tests scripts
        timeout-minutes: ${{ matrix.test-group.timeout }}
        run: |
          source ${{ github.workspace }}/python_env/bin/activate
          cd $TT_METAL_HOME
          export PYTHONPATH=$TT_METAL_HOME
          ${{ matrix.test-group.cmd }}
      - uses: ./.github/actions/upload-artifact-with-job-uuid
        if: ${{ !cancelled() }}
        with:
          path: |
            generated/test_reports/
          prefix: "test_reports_"
  nightly-wh-models:
    needs: build-artifact
    strategy:
      # Do not fail-fast because we need to ensure all tests go to completion
      # so we try not to get hanging machines
      fail-fast: false
      matrix:
        card: [N150, N300]
        model: [common_models, functional_unet, llama31_8b, mamba, mistral7b, mistral7b_eth, resnet50]
    name: Nightly ${{ matrix.card }} ${{ matrix.model }}
    env:
      ARCH_NAME: wormhole_b0
      LOGURU_LEVEL: INFO
      LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
    runs-on: ["cloud-virtual-machine", "in-service", "${{ matrix.card }}"]
    steps:
      - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
      - uses: ./.github/actions/retry-command
        with:
          timeout-seconds: 100
          max-retries: 10
          backoff-seconds: 60
          command: ./.github/scripts/cloud_utils/mount_weka.sh
      - name: Set up dyanmic env vars for build
        run: |
          echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
      - name: Set up WH_ARCH_YAML for eth-enabled models
        if: ${{ matrix.model != 'mistral7b' }}
        run: |
          echo "WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml" >> $GITHUB_ENV
      - uses: actions/download-artifact@v4
        with:
          name: TTMetal_build_wormhole_b0
      - name: Extract files
        run: tar -xvf ttm_wormhole_b0.tar
      - uses: ./.github/actions/install-python-deps
      - name: Run frequent reg tests scripts
        timeout-minutes: 50
        run: |
          source ${{ github.workspace }}/python_env/bin/activate
          cd $TT_METAL_HOME
          export PYTHONPATH=$TT_METAL_HOME
          pytest -n auto tests/nightly/single_card/${{ matrix.model }}
      - uses: ./.github/actions/upload-artifact-with-job-uuid
        if: ${{ !cancelled() }}
        with:
          path: |
            generated/test_reports/
          prefix: "test_reports_"