Skip to content

Commit

Permalink
Dockerize BH pipeline (#15523)
Browse files Browse the repository at this point in the history
### Ticket
Progress towards #14393 

### Problem description
To run our pipeline on 22.04, we need to first dockerize the steps (else
we need a whole new fleet of build runners).

### What's changed
Run the Blackhole post-commit within Docker. Still using 20.04, but sets
the stage for 22.04.

### Checklist
- [ ] Post commit CI passes
https://github.com/tenstorrent/tt-metal/actions/runs/12263532830
- [ ] Blackhole Post commit (if applicable)
https://github.com/tenstorrent/tt-metal/actions/runs/12263536331
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [ ] New/Existing tests provide coverage for changes
  • Loading branch information
afuller-TT authored Dec 10, 2024
1 parent 208db3e commit 52dec89
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 16 deletions.
1 change: 1 addition & 0 deletions .github/workflows/all-post-commit-workflows.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ jobs:
]
uses: ./.github/workflows/fast-dispatch-build-and-unit-tests.yaml
with:
os: ubuntu-20.04
arch: ${{ matrix.test-group.arch }}
runner-label: ${{ matrix.test-group.runner-label }}
# TTNN FD Unit tests
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/blackhole-post-commit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ jobs:
uses: ./.github/workflows/build-artifact.yaml
secrets: inherit
with:
os: "ubuntu-20.04-amd64"
arch: '["blackhole"]'
build-docker: false
build-wheels:
Expand Down Expand Up @@ -57,13 +58,15 @@ jobs:
arch: blackhole
runner-label: BH
timeout: 30
os: "ubuntu-20.04"
fd-unit-tests:
needs: build-wheels
uses: ./.github/workflows/fast-dispatch-build-and-unit-tests.yaml
secrets: inherit
with:
arch: blackhole
runner-label: BH
os: "ubuntu-20.04"
# FD C++ Unit Tests
cpp-unit-tests:
needs: build-artifact
Expand All @@ -73,6 +76,7 @@ jobs:
arch: blackhole
runner-label: BH
timeout: 60
os: "ubuntu-20.04"

# profiler-regression:
# needs: build-artifact-profiler
Expand Down
33 changes: 23 additions & 10 deletions .github/workflows/build-and-unit-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ on:
required: false
type: number
default: 35
os:
required: false
type: string
default: "ubuntu-20.04"
workflow_dispatch:
inputs:
arch:
Expand All @@ -34,6 +38,11 @@ on:
required: false
type: number
default: 35
os:
required: false
type: string
default: "ubuntu-20.04"

jobs:
unit-tests-slow-dispatch:
name: ${{ inputs.arch }} ${{ inputs.runner-label }}
Expand All @@ -42,24 +51,28 @@ jobs:
- cloud-virtual-machine
- in-service
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ inputs.arch}}
TT_METAL_SLOW_DISPATCH_MODE: 1
ARCH_NAME: ${{ inputs.arch }}
LOGURU_LEVEL: INFO
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
steps:
- uses: tenstorrent/tt-metal/.github/actions/checkout-with-submodule-lfs@main
- uses: ./.github/actions/prepare-metal-run
with:
arch: ${{ inputs.arch }}
- name: Set up dynamic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
- name: Run pre/post regression tests
timeout-minutes: ${{ inputs.timeout }}
run: |
source ${{ github.workspace }}/python_env/bin/activate
./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type post_commit --dispatch-mode slow
uses: ./.github/actions/docker-run
with:
docker_os_arch: tt-metalium/${{ inputs.os }}-amd64
docker_password: ${{ secrets.GITHUB_TOKEN }}
docker_opts: |
-e ARCH_NAME=${{ inputs.arch}}
-e TT_METAL_HOME=${{ github.workspace }}
-e TT_METAL_SLOW_DISPATCH_MODE=1
-e LD_LIBRARY_PATH=${{ github.workspace }}/build/lib
run_args: |
python3 -m pip install -r $(pwd)/tt_metal/python_env/requirements-dev.txt
pip install -e .
./tests/scripts/run_tests.sh --tt-arch ${{ inputs.arch }} --pipeline-type post_commit --dispatch-mode slow
- uses: ./.github/actions/slack-report
if: ${{ failure() }}
with:
Expand Down
25 changes: 20 additions & 5 deletions .github/workflows/cpp-post-commit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ on:
required: false
type: number
default: 80
os:
required: false
type: string
default: "ubuntu-20.04"
workflow_dispatch:
inputs:
arch:
Expand All @@ -34,6 +38,10 @@ on:
required: false
type: number
default: 60
os:
required: false
type: string
default: "ubuntu-20.04"

jobs:
models:
Expand Down Expand Up @@ -67,11 +75,18 @@ jobs:
arch: ${{ inputs.arch }}
- name: ${{ matrix.test-group.name }} tests
timeout-minutes: ${{ inputs.timeout }}
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
${{ matrix.test-group.cmd }}
uses: ./.github/actions/docker-run
with:
docker_os_arch: tt-metalium/${{ inputs.os }}-amd64
docker_password: ${{ secrets.GITHUB_TOKEN }}
docker_opts: |
-e TT_METAL_HOME=${{ github.workspace }}
-e ARCH_NAME=${{ inputs.arch }}
-e LD_LIBRARY_PATH=${{ github.workspace }}/build/lib
run_args: |
python3 -m pip install -r $(pwd)/tt_metal/python_env/requirements-dev.txt
python3 -m pip install -e .
${{ matrix.test-group.cmd }}
- uses: ./.github/actions/slack-report
if: ${{ failure() }}
with:
Expand Down
11 changes: 10 additions & 1 deletion .github/workflows/fast-dispatch-build-and-unit-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ on:
required: false
type: number
default: 45
os:
required: false
type: string
default: "ubuntu-20.04"
workflow_dispatch:
inputs:
arch:
Expand All @@ -34,6 +38,10 @@ on:
required: false
type: number
default: 45
os:
required: false
type: string
default: "ubuntu-20.04"

jobs:
fd-tests:
Expand All @@ -42,7 +50,7 @@ jobs:
# so we try not to get hanging machines
fail-fast: false
matrix:
os: ["ubuntu-20.04"]
os: ["${{ inputs.os }}"]
test-group: [
{name: eager unit tests 1, cmd: pytest tests/tt_eager/python_api_testing/unit_testing/ -xvvv --splits 7 --group 1 },
{name: eager unit tests 2, cmd: pytest tests/tt_eager/python_api_testing/unit_testing/ -xvvv --splits 7 --group 2 },
Expand Down Expand Up @@ -70,6 +78,7 @@ jobs:
timeout-minutes: ${{ inputs.timeout }}
uses: ./.github/actions/docker-run
with:
docker_os_arch: tt-metalium/${{ inputs.os }}-amd64
install_wheel: true
docker_password: ${{ secrets.GITHUB_TOKEN }}
run_args: |
Expand Down

0 comments on commit 52dec89

Please sign in to comment.