From 52dec89969b70ca6de776f7e42e2c7d61f3cc42c Mon Sep 17 00:00:00 2001 From: Andrew Fuller Date: Tue, 10 Dec 2024 17:12:25 -0500 Subject: [PATCH] Dockerize BH pipeline (#15523) ### Ticket Progress towards #14393 ### Problem description To run our pipeline on 22.04, we need to first dockerize the steps (else we need a whole new fleet of build runners). ### What's changed Run the Blackhole post-commit within Docker. Still using 20.04, but sets the stage for 22.04. ### Checklist - [ ] Post commit CI passes https://github.com/tenstorrent/tt-metal/actions/runs/12263532830 - [ ] Blackhole Post commit (if applicable) https://github.com/tenstorrent/tt-metal/actions/runs/12263536331 - [ ] Model regression CI testing passes (if applicable) - [ ] Device performance regression CI testing passes (if applicable) - [ ] New/Existing tests provide coverage for changes --- .../workflows/all-post-commit-workflows.yaml | 1 + .github/workflows/blackhole-post-commit.yaml | 4 +++ .github/workflows/build-and-unit-tests.yaml | 33 +++++++++++++------ .github/workflows/cpp-post-commit.yaml | 25 +++++++++++--- .../fast-dispatch-build-and-unit-tests.yaml | 11 ++++++- 5 files changed, 58 insertions(+), 16 deletions(-) diff --git a/.github/workflows/all-post-commit-workflows.yaml b/.github/workflows/all-post-commit-workflows.yaml index 57bae427f2f..82b5feb14aa 100644 --- a/.github/workflows/all-post-commit-workflows.yaml +++ b/.github/workflows/all-post-commit-workflows.yaml @@ -108,6 +108,7 @@ jobs: ] uses: ./.github/workflows/fast-dispatch-build-and-unit-tests.yaml with: + os: ubuntu-20.04 arch: ${{ matrix.test-group.arch }} runner-label: ${{ matrix.test-group.runner-label }} # TTNN FD Unit tests diff --git a/.github/workflows/blackhole-post-commit.yaml b/.github/workflows/blackhole-post-commit.yaml index 8a09f14ecb2..ba479a8b63c 100644 --- a/.github/workflows/blackhole-post-commit.yaml +++ b/.github/workflows/blackhole-post-commit.yaml @@ -29,6 +29,7 @@ jobs: uses: ./.github/workflows/build-artifact.yaml secrets: inherit with: + os: "ubuntu-20.04-amd64" arch: '["blackhole"]' build-docker: false build-wheels: @@ -57,6 +58,7 @@ jobs: arch: blackhole runner-label: BH timeout: 30 + os: "ubuntu-20.04" fd-unit-tests: needs: build-wheels uses: ./.github/workflows/fast-dispatch-build-and-unit-tests.yaml @@ -64,6 +66,7 @@ jobs: with: arch: blackhole runner-label: BH + os: "ubuntu-20.04" # FD C++ Unit Tests cpp-unit-tests: needs: build-artifact @@ -73,6 +76,7 @@ jobs: arch: blackhole runner-label: BH timeout: 60 + os: "ubuntu-20.04" # profiler-regression: # needs: build-artifact-profiler diff --git a/.github/workflows/build-and-unit-tests.yaml b/.github/workflows/build-and-unit-tests.yaml index c17c38a4eb1..e51dced1890 100644 --- a/.github/workflows/build-and-unit-tests.yaml +++ b/.github/workflows/build-and-unit-tests.yaml @@ -13,6 +13,10 @@ on: required: false type: number default: 35 + os: + required: false + type: string + default: "ubuntu-20.04" workflow_dispatch: inputs: arch: @@ -34,6 +38,11 @@ on: required: false type: number default: 35 + os: + required: false + type: string + default: "ubuntu-20.04" + jobs: unit-tests-slow-dispatch: name: ${{ inputs.arch }} ${{ inputs.runner-label }} @@ -42,24 +51,28 @@ jobs: - cloud-virtual-machine - in-service env: - TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} - ARCH_NAME: ${{ inputs.arch}} - TT_METAL_SLOW_DISPATCH_MODE: 1 + ARCH_NAME: ${{ inputs.arch }} LOGURU_LEVEL: INFO - LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib steps: - uses: tenstorrent/tt-metal/.github/actions/checkout-with-submodule-lfs@main - uses: ./.github/actions/prepare-metal-run with: arch: ${{ inputs.arch }} - - name: Set up dynamic env vars for build - run: | - echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV - name: Run pre/post regression tests timeout-minutes: ${{ inputs.timeout }} - run: | - source ${{ github.workspace }}/python_env/bin/activate - ./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type post_commit --dispatch-mode slow + uses: ./.github/actions/docker-run + with: + docker_os_arch: tt-metalium/${{ inputs.os }}-amd64 + docker_password: ${{ secrets.GITHUB_TOKEN }} + docker_opts: | + -e ARCH_NAME=${{ inputs.arch}} + -e TT_METAL_HOME=${{ github.workspace }} + -e TT_METAL_SLOW_DISPATCH_MODE=1 + -e LD_LIBRARY_PATH=${{ github.workspace }}/build/lib + run_args: | + python3 -m pip install -r $(pwd)/tt_metal/python_env/requirements-dev.txt + pip install -e . + ./tests/scripts/run_tests.sh --tt-arch ${{ inputs.arch }} --pipeline-type post_commit --dispatch-mode slow - uses: ./.github/actions/slack-report if: ${{ failure() }} with: diff --git a/.github/workflows/cpp-post-commit.yaml b/.github/workflows/cpp-post-commit.yaml index ef34c142234..c90c623cb76 100644 --- a/.github/workflows/cpp-post-commit.yaml +++ b/.github/workflows/cpp-post-commit.yaml @@ -13,6 +13,10 @@ on: required: false type: number default: 80 + os: + required: false + type: string + default: "ubuntu-20.04" workflow_dispatch: inputs: arch: @@ -34,6 +38,10 @@ on: required: false type: number default: 60 + os: + required: false + type: string + default: "ubuntu-20.04" jobs: models: @@ -67,11 +75,18 @@ jobs: arch: ${{ inputs.arch }} - name: ${{ matrix.test-group.name }} tests timeout-minutes: ${{ inputs.timeout }} - run: | - source ${{ github.workspace }}/python_env/bin/activate - cd $TT_METAL_HOME - export PYTHONPATH=$TT_METAL_HOME - ${{ matrix.test-group.cmd }} + uses: ./.github/actions/docker-run + with: + docker_os_arch: tt-metalium/${{ inputs.os }}-amd64 + docker_password: ${{ secrets.GITHUB_TOKEN }} + docker_opts: | + -e TT_METAL_HOME=${{ github.workspace }} + -e ARCH_NAME=${{ inputs.arch }} + -e LD_LIBRARY_PATH=${{ github.workspace }}/build/lib + run_args: | + python3 -m pip install -r $(pwd)/tt_metal/python_env/requirements-dev.txt + python3 -m pip install -e . + ${{ matrix.test-group.cmd }} - uses: ./.github/actions/slack-report if: ${{ failure() }} with: diff --git a/.github/workflows/fast-dispatch-build-and-unit-tests.yaml b/.github/workflows/fast-dispatch-build-and-unit-tests.yaml index 8291c3dee52..2c55a940034 100644 --- a/.github/workflows/fast-dispatch-build-and-unit-tests.yaml +++ b/.github/workflows/fast-dispatch-build-and-unit-tests.yaml @@ -13,6 +13,10 @@ on: required: false type: number default: 45 + os: + required: false + type: string + default: "ubuntu-20.04" workflow_dispatch: inputs: arch: @@ -34,6 +38,10 @@ on: required: false type: number default: 45 + os: + required: false + type: string + default: "ubuntu-20.04" jobs: fd-tests: @@ -42,7 +50,7 @@ jobs: # so we try not to get hanging machines fail-fast: false matrix: - os: ["ubuntu-20.04"] + os: ["${{ inputs.os }}"] test-group: [ {name: eager unit tests 1, cmd: pytest tests/tt_eager/python_api_testing/unit_testing/ -xvvv --splits 7 --group 1 }, {name: eager unit tests 2, cmd: pytest tests/tt_eager/python_api_testing/unit_testing/ -xvvv --splits 7 --group 2 }, @@ -70,6 +78,7 @@ jobs: timeout-minutes: ${{ inputs.timeout }} uses: ./.github/actions/docker-run with: + docker_os_arch: tt-metalium/${{ inputs.os }}-amd64 install_wheel: true docker_password: ${{ secrets.GITHUB_TOKEN }} run_args: |