diff --git a/.github/workflows/pipeline-select-galaxy.yaml b/.github/workflows/pipeline-select-galaxy.yaml new file mode 100644 index 00000000000..5e947a69bad --- /dev/null +++ b/.github/workflows/pipeline-select-galaxy.yaml @@ -0,0 +1,81 @@ +name: "(TG/TGG) Choose your pipeline" + +on: + workflow_dispatch: + inputs: + build-type: + required: false + type: choice + options: + - Release + - Debug + - RelWithDebInfo + - CI + default: "Release" + build-with-tracy: + required: false + type: boolean + default: false + tgg-unit: + required: false + type: boolean + default: false + tgg-frequent: + required: false + type: boolean + default: false + tgg-model-perf: + required: false + type: boolean + default: false + tg-unit: + required: false + type: boolean + default: false + tg-frequent: + required: false + type: boolean + default: false + tg-model-perf: + required: false + type: boolean + default: false + +run-name: ${{ inputs.description }} +jobs: + build-artifact: + uses: ./.github/workflows/build-artifact.yaml + with: + build-type: ${{ inputs.build-type }} + tracy: ${{ inputs.build-with-tracy }} + secrets: inherit + tgg-unit-tests: + needs: build-artifact + secrets: inherit + uses: ./.github/workflows/tgg-unit-tests-impl.yaml + if: ${{ inputs.tgg-unit }} + tgg-frequent-tests: + needs: build-artifact + secrets: inherit + uses: ./.github/workflows/tgg-frequent-tests-impl.yaml + if: ${{ inputs.tgg-frequent }} + tgg-model-perf-tests: + needs: build-artifact + secrets: inherit + uses: ./.github/workflows/tgg-model-perf-tests-impl.yaml + if: ${{ inputs.tgg-model-perf }} + tg-unit-tests: + needs: build-artifact + secrets: inherit + uses: ./.github/workflows/tg-unit-tests-impl.yaml + if: ${{ inputs.tg-unit }} + tg-frequent-tests: + needs: build-artifact + secrets: inherit + uses: ./.github/workflows/tg-frequent-tests-impl.yaml + if: ${{ inputs.tg-frequent }} + tg-model-perf-tests: + needs: build-artifact + secrets: inherit + uses: ./.github/workflows/tg-model-perf-tests-impl.yaml + if: ${{ inputs.tg-model-perf }} diff --git a/.github/workflows/pipeline-select-t3k.yaml b/.github/workflows/pipeline-select-t3k.yaml index ddd2947319c..3df726bdd6e 100644 --- a/.github/workflows/pipeline-select-t3k.yaml +++ b/.github/workflows/pipeline-select-t3k.yaml @@ -36,6 +36,11 @@ on: required: false type: boolean default: false + t3000-profiler: + description: "T3000 profiler tests (requires tracy build)" + required: false + type: boolean + default: false run-name: ${{ inputs.description }} jobs: @@ -70,3 +75,8 @@ jobs: secrets: inherit uses: ./.github/workflows/t3000-model-perf-tests-impl.yaml if: ${{ inputs.t3000-model-perf }} + t3000-profiler-tests: + needs: build-artifact + secrets: inherit + uses: ./.github/workflows/t3000-profiler-tests-impl.yaml + if: ${{ inputs.t3000-profiler }} diff --git a/.github/workflows/pipeline-select.yaml b/.github/workflows/pipeline-select.yaml index 8f991bb0c5c..d3d575e1191 100644 --- a/.github/workflows/pipeline-select.yaml +++ b/.github/workflows/pipeline-select.yaml @@ -1,4 +1,4 @@ -name: "(Single/TG/TGG) Choose your pipeline" +name: "(Single) Choose your pipeline" on: workflow_dispatch: @@ -33,22 +33,6 @@ on: required: false type: boolean default: false - tgg-unit: - required: false - type: boolean - default: false - tgg-frequent: - required: false - type: boolean - default: false - tg-unit: - required: false - type: boolean - default: false - tg-frequent: - required: false - type: boolean - default: false run-name: ${{ inputs.description }} jobs: @@ -78,23 +62,3 @@ jobs: secrets: inherit uses: ./.github/workflows/fast-dispatch-full-regressions-and-models-impl.yaml if: ${{ inputs.single-card-nightly }} - tgg-unit-tests: - needs: build-artifact - secrets: inherit - uses: ./.github/workflows/tgg-unit-tests-impl.yaml - if: ${{ inputs.tgg-unit }} - tgg-frequent-tests: - needs: build-artifact - secrets: inherit - uses: ./.github/workflows/tgg-frequent-tests-impl.yaml - if: ${{ inputs.tgg-frequent }} - tg-unit-tests: - needs: build-artifact - secrets: inherit - uses: ./.github/workflows/tg-unit-tests-impl.yaml - if: ${{ inputs.tg-unit }} - tg-frequent-tests: - needs: build-artifact - secrets: inherit - uses: ./.github/workflows/tg-frequent-tests-impl.yaml - if: ${{ inputs.tg-frequent }} diff --git a/.github/workflows/t3000-profiler-tests-impl.yaml b/.github/workflows/t3000-profiler-tests-impl.yaml new file mode 100644 index 00000000000..571ac1628e3 --- /dev/null +++ b/.github/workflows/t3000-profiler-tests-impl.yaml @@ -0,0 +1,46 @@ +name: "[internal] T3000 profiler tests impl" + +on: + workflow_call: + +jobs: + t3000-profiler-tests: + strategy: + fail-fast: false + matrix: + test-group: [ + { + name: "T3000 profiler tests", + arch: wormhole_b0, + runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-perf"], + cmd: './tests/scripts/run_profiler_regressions.sh' + }, + ] + name: ${{ matrix.test-group.name }} + env: + TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} + ARCH_NAME: ${{ matrix.test-group.arch }} + LOGURU_LEVEL: INFO + LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib + environment: dev + runs-on: ${{ matrix.test-group.runs-on }} + steps: + - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 + - name: Set up dynamic env vars for build + run: | + echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV + - uses: actions/download-artifact@v4 + with: + name: TTMetal_build_${{ matrix.test-group.arch }}_profiler + - name: Extract files + run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar + - uses: ./.github/actions/install-python-deps + - name: Run profiler regression tests + timeout-minutes: 30 + run: | + ./tests/scripts/run_profiler_regressions.sh + - uses: ./.github/actions/slack-report + if: ${{ failure() }} + with: + slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }} + owner: U03BJ1L3LUQ # Mo Memarian diff --git a/.github/workflows/t3000-profiler-tests.yaml b/.github/workflows/t3000-profiler-tests.yaml index b63ecedf213..ccc9dda2876 100644 --- a/.github/workflows/t3000-profiler-tests.yaml +++ b/.github/workflows/t3000-profiler-tests.yaml @@ -15,42 +15,5 @@ jobs: secrets: inherit t3000-profiler-tests: needs: build-artifact-profiler - strategy: - fail-fast: false - matrix: - test-group: [ - { - name: "T3000 profiler tests", - arch: wormhole_b0, - runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-perf"], - cmd: './tests/scripts/run_profiler_regressions.sh' - }, - ] - name: ${{ matrix.test-group.name }} - env: - TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} - ARCH_NAME: ${{ matrix.test-group.arch }} - LOGURU_LEVEL: INFO - LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib - environment: dev - runs-on: ${{ matrix.test-group.runs-on }} - steps: - - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 - - name: Set up dynamic env vars for build - run: | - echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV - - uses: actions/download-artifact@v4 - with: - name: TTMetal_build_${{ matrix.test-group.arch }}_profiler - - name: Extract files - run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar - - uses: ./.github/actions/install-python-deps - - name: Run profiler regression tests - timeout-minutes: 30 - run: | - ./tests/scripts/run_profiler_regressions.sh - - uses: ./.github/actions/slack-report - if: ${{ failure() }} - with: - slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }} - owner: U03BJ1L3LUQ # Mo Memarian + secrets: inherit + uses: ./.github/workflows/t3000-profiler-tests-impl.yaml diff --git a/.github/workflows/tg-model-perf-tests-impl.yaml b/.github/workflows/tg-model-perf-tests-impl.yaml new file mode 100644 index 00000000000..dd10b6109a9 --- /dev/null +++ b/.github/workflows/tg-model-perf-tests-impl.yaml @@ -0,0 +1,79 @@ +name: "[internal] TG model perf tests impl" + +on: + workflow_call: + +jobs: + tg-model-perf-tests: + strategy: + fail-fast: false + matrix: + test-group: [ + { + name: "TG LLM model perf tests", + model-type: "LLM", + arch: wormhole_b0, + runs-on: ["arch-wormhole_b0", "config-tg", "in-service", "bare-metal", "pipeline-perf"], + cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type llm_model_perf_tg_device --dispatch-mode ""' + }, + { + name: "TG CNN model perf tests", + model-type: "CNN", + arch: wormhole_b0, + runs-on: ["arch-wormhole_b0", "config-tg", "in-service", "bare-metal", "pipeline-perf"], + cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type cnn_model_perf_tg_device --dispatch-mode ""' + }, + ] + name: ${{ matrix.test-group.name }} + env: + TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} + ARCH_NAME: ${{ matrix.test-group.arch }} + LOGURU_LEVEL: INFO + LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib + environment: dev + runs-on: ${{ matrix.test-group.runs-on }} + steps: + - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 + - name: Enable performance mode + run: | + sudo cpupower frequency-set -g performance + - name: Ensure weka mount is active + run: | + sudo systemctl restart mnt-MLPerf.mount + sudo /etc/rc.local + ls -al /mnt/MLPerf/bit_error_tests + - name: Set up dynamic env vars for build + run: | + echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV + echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV + - uses: actions/download-artifact@v4 + with: + name: TTMetal_build_${{ matrix.test-group.arch }} + - name: Extract files + run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar + - uses: ./.github/actions/install-python-deps + - name: Run model perf regression tests + timeout-minutes: 60 + run: | + source ${{ github.workspace }}/python_env/bin/activate + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + ${{ matrix.test-group.cmd }} + - name: Check perf report exists + id: check-perf-report + if: ${{ !cancelled() }} + run: | + ls -hal + export PERF_REPORT_FILENAME=Models_Perf_$(date +%Y_%m_%d).csv + ls -hal $PERF_REPORT_FILENAME + echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT" + - name: Upload perf report + if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }} + uses: actions/upload-artifact@v4 + with: + name: perf-report-csv-${{ matrix.test-group.model-type }}-${{ matrix.test-group.arch }}-${{ matrix.test-group.machine-type }} + path: "${{ steps.check-perf-report.outputs.perf_report_filename }}" + - name: Disable performance mode + if: always() + run: | + sudo cpupower frequency-set -g ondemand diff --git a/.github/workflows/tg-model-perf-tests.yaml b/.github/workflows/tg-model-perf-tests.yaml index 0dd1580e371..a813b763602 100644 --- a/.github/workflows/tg-model-perf-tests.yaml +++ b/.github/workflows/tg-model-perf-tests.yaml @@ -13,75 +13,5 @@ jobs: secrets: inherit tg-model-perf-tests: needs: build-artifact - strategy: - fail-fast: false - matrix: - test-group: [ - { - name: "TG LLM model perf tests", - model-type: "LLM", - arch: wormhole_b0, - runs-on: ["arch-wormhole_b0", "config-tg", "in-service", "bare-metal", "pipeline-perf"], - cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type llm_model_perf_tg_device --dispatch-mode ""' - }, - { - name: "TG CNN model perf tests", - model-type: "CNN", - arch: wormhole_b0, - runs-on: ["arch-wormhole_b0", "config-tg", "in-service", "bare-metal", "pipeline-perf"], - cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type cnn_model_perf_tg_device --dispatch-mode ""' - }, - ] - name: ${{ matrix.test-group.name }} - env: - TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} - ARCH_NAME: ${{ matrix.test-group.arch }} - LOGURU_LEVEL: INFO - LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib - environment: dev - runs-on: ${{ matrix.test-group.runs-on }} - steps: - - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 - - name: Enable performance mode - run: | - sudo cpupower frequency-set -g performance - - name: Ensure weka mount is active - run: | - sudo systemctl restart mnt-MLPerf.mount - sudo /etc/rc.local - ls -al /mnt/MLPerf/bit_error_tests - - name: Set up dynamic env vars for build - run: | - echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV - echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV - - uses: actions/download-artifact@v4 - with: - name: TTMetal_build_${{ matrix.test-group.arch }} - - name: Extract files - run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar - - uses: ./.github/actions/install-python-deps - - name: Run model perf regression tests - timeout-minutes: 60 - run: | - source ${{ github.workspace }}/python_env/bin/activate - cd $TT_METAL_HOME - export PYTHONPATH=$TT_METAL_HOME - ${{ matrix.test-group.cmd }} - - name: Check perf report exists - id: check-perf-report - if: ${{ !cancelled() }} - run: | - ls -hal - export PERF_REPORT_FILENAME=Models_Perf_$(date +%Y_%m_%d).csv - ls -hal $PERF_REPORT_FILENAME - echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT" - - name: Upload perf report - if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }} - uses: actions/upload-artifact@v4 - with: - name: perf-report-csv-${{ matrix.test-group.model-type }}-${{ matrix.test-group.arch }}-${{ matrix.test-group.machine-type }} - path: "${{ steps.check-perf-report.outputs.perf_report_filename }}" - - name: Disable performance mode - if: always() - run: | - sudo cpupower frequency-set -g ondemand + secrets: inherit + uses: ./.github/workflows/tg-model-perf-tests-impl.yaml diff --git a/.github/workflows/tgg-model-perf-tests-impl.yaml b/.github/workflows/tgg-model-perf-tests-impl.yaml new file mode 100644 index 00000000000..f3d44f2e2ba --- /dev/null +++ b/.github/workflows/tgg-model-perf-tests-impl.yaml @@ -0,0 +1,79 @@ +name: "[internal] TGG model perf tests impl" + +on: + workflow_call: + +jobs: + tgg-model-perf-tests: + strategy: + fail-fast: false + matrix: + test-group: [ + { + name: "TGG LLM model perf tests", + model-type: "LLM", + arch: wormhole_b0, + runs-on: ["arch-wormhole_b0", "config-tgg", "in-service", "bare-metal", "pipeline-perf"], + cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type llm_model_perf_tgg_device --dispatch-mode ""' + }, + { + name: "TGG CNN model perf tests", + model-type: "CNN", + arch: wormhole_b0, + runs-on: ["arch-wormhole_b0", "config-tgg", "in-service", "bare-metal", "pipeline-perf"], + cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type cnn_model_perf_tgg_device --dispatch-mode ""' + }, + ] + name: ${{ matrix.test-group.name }} + env: + TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} + ARCH_NAME: ${{ matrix.test-group.arch }} + LOGURU_LEVEL: INFO + LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib + environment: dev + runs-on: ${{ matrix.test-group.runs-on }} + steps: + - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 + - name: Enable performance mode + run: | + sudo cpupower frequency-set -g performance + - name: Ensure weka mount is active + run: | + sudo systemctl restart mnt-MLPerf.mount + sudo /etc/rc.local + ls -al /mnt/MLPerf/bit_error_tests + - name: Set up dynamic env vars for build + run: | + echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV + echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV + - uses: actions/download-artifact@v4 + with: + name: TTMetal_build_${{ matrix.test-group.arch }} + - name: Extract files + run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar + - uses: ./.github/actions/install-python-deps + - name: Run model perf regression tests + timeout-minutes: 60 + run: | + source ${{ github.workspace }}/python_env/bin/activate + cd $TT_METAL_HOME + export PYTHONPATH=$TT_METAL_HOME + ${{ matrix.test-group.cmd }} + - name: Check perf report exists + id: check-perf-report + if: ${{ !cancelled() }} + run: | + ls -hal + export PERF_REPORT_FILENAME=Models_Perf_$(date +%Y_%m_%d).csv + ls -hal $PERF_REPORT_FILENAME + echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT" + - name: Upload perf report + if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }} + uses: actions/upload-artifact@v4 + with: + name: perf-report-csv-${{ matrix.test-group.model-type }}-${{ matrix.test-group.arch }}-${{ matrix.test-group.machine-type }} + path: "${{ steps.check-perf-report.outputs.perf_report_filename }}" + - name: Disable performance mode + if: always() + run: | + sudo cpupower frequency-set -g ondemand diff --git a/.github/workflows/tgg-model-perf-tests.yaml b/.github/workflows/tgg-model-perf-tests.yaml index 259fb3fa7b7..c65fc7408d6 100644 --- a/.github/workflows/tgg-model-perf-tests.yaml +++ b/.github/workflows/tgg-model-perf-tests.yaml @@ -13,75 +13,5 @@ jobs: secrets: inherit tgg-model-perf-tests: needs: build-artifact - strategy: - fail-fast: false - matrix: - test-group: [ - { - name: "TGG LLM model perf tests", - model-type: "LLM", - arch: wormhole_b0, - runs-on: ["arch-wormhole_b0", "config-tgg", "in-service", "bare-metal", "pipeline-perf"], - cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type llm_model_perf_tgg_device --dispatch-mode ""' - }, - { - name: "TGG CNN model perf tests", - model-type: "CNN", - arch: wormhole_b0, - runs-on: ["arch-wormhole_b0", "config-tgg", "in-service", "bare-metal", "pipeline-perf"], - cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type cnn_model_perf_tgg_device --dispatch-mode ""' - }, - ] - name: ${{ matrix.test-group.name }} - env: - TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} - ARCH_NAME: ${{ matrix.test-group.arch }} - LOGURU_LEVEL: INFO - LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib - environment: dev - runs-on: ${{ matrix.test-group.runs-on }} - steps: - - uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 - - name: Enable performance mode - run: | - sudo cpupower frequency-set -g performance - - name: Ensure weka mount is active - run: | - sudo systemctl restart mnt-MLPerf.mount - sudo /etc/rc.local - ls -al /mnt/MLPerf/bit_error_tests - - name: Set up dynamic env vars for build - run: | - echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV - echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV - - uses: actions/download-artifact@v4 - with: - name: TTMetal_build_${{ matrix.test-group.arch }} - - name: Extract files - run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar - - uses: ./.github/actions/install-python-deps - - name: Run model perf regression tests - timeout-minutes: 60 - run: | - source ${{ github.workspace }}/python_env/bin/activate - cd $TT_METAL_HOME - export PYTHONPATH=$TT_METAL_HOME - ${{ matrix.test-group.cmd }} - - name: Check perf report exists - id: check-perf-report - if: ${{ !cancelled() }} - run: | - ls -hal - export PERF_REPORT_FILENAME=Models_Perf_$(date +%Y_%m_%d).csv - ls -hal $PERF_REPORT_FILENAME - echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT" - - name: Upload perf report - if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }} - uses: actions/upload-artifact@v4 - with: - name: perf-report-csv-${{ matrix.test-group.model-type }}-${{ matrix.test-group.arch }}-${{ matrix.test-group.machine-type }} - path: "${{ steps.check-perf-report.outputs.perf_report_filename }}" - - name: Disable performance mode - if: always() - run: | - sudo cpupower frequency-set -g ondemand + secrets: inherit + uses: ./.github/workflows/tgg-model-perf-tests-impl.yaml