Skip to content

Commit

Permalink
Merge branch 'DM/schedulers' of github.com:tenstorrent/tt-metal into …
Browse files Browse the repository at this point in the history
…DM/schedulers
  • Loading branch information
dmakoviichuk-tt committed Dec 12, 2024
2 parents d560678 + 47101d4 commit 33b12de
Show file tree
Hide file tree
Showing 437 changed files with 14,238 additions and 5,982 deletions.
4 changes: 4 additions & 0 deletions .github/actions/docker-run/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ inputs:
description: 'Docker image architecture'
required: false
default: tt-metalium/ubuntu-20.04-amd64
docker_version:
description: 'Specify version for the Docker image tag to use.'
required: false
docker_username:
description: docker login username
required: true
Expand Down Expand Up @@ -38,6 +41,7 @@ runs:
uses: ./.github/actions/generate-docker-tag
with:
image: ${{ inputs.docker_os_arch }}
version: ${{ inputs.docker_version }}
- name: Set
shell: bash
run: |
Expand Down
15 changes: 11 additions & 4 deletions .github/actions/generate-docker-tag/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,24 @@ inputs:
description: 'Docker image to run commands in - follows os-arch format'
required: false
default: ubuntu-20.04-amd64

version:
description: 'Docker image version'
required: false
runs:
using: "composite"
steps:
- name: Determine Docker Tag
shell: bash
run: |
if [[ "${GITHUB_REF_NAME}" == "main" ]]; then
echo "IMAGE_TAG=latest" >> $GITHUB_ENV
# If the version was provided use it, otherwise, determine what the version should be.
if [ "${{ inputs.version }}" != "" ]; then
echo "IMAGE_TAG=${{ inputs.version }}" >> $GITHUB_ENV
else
echo "IMAGE_TAG=dev-${GITHUB_REF_NAME//\//-}" >> $GITHUB_ENV
if [[ "${GITHUB_REF_NAME}" == "main" ]]; then
echo "IMAGE_TAG=latest" >> $GITHUB_ENV
else
echo "IMAGE_TAG=dev-${GITHUB_REF_NAME//\//-}" >> $GITHUB_ENV
fi
fi
- name: Determine Full Docker Image Tag
shell: bash
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/all-post-commit-workflows.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ jobs:
]
uses: ./.github/workflows/fast-dispatch-build-and-unit-tests.yaml
with:
os: ubuntu-20.04
arch: ${{ matrix.test-group.arch }}
runner-label: ${{ matrix.test-group.runner-label }}
# TTNN FD Unit tests
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/all-static-checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Check kernel count in base metal is less than maximum
run: if (( $(find tt_metal/kernels/ -type f | wc -l) > 7 )); then exit 1; fi
run: if (( $(find tt_metal/kernels/ -type f | wc -l) > 8 )); then exit 1; fi
check-doc:
runs-on: ubuntu-latest
steps:
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/blackhole-post-commit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ jobs:
uses: ./.github/workflows/build-artifact.yaml
secrets: inherit
with:
os: "ubuntu-20.04-amd64"
arch: '["blackhole"]'
build-docker: false
build-wheels:
Expand Down Expand Up @@ -57,13 +58,15 @@ jobs:
arch: blackhole
runner-label: BH
timeout: 30
os: "ubuntu-20.04"
fd-unit-tests:
needs: build-wheels
uses: ./.github/workflows/fast-dispatch-build-and-unit-tests.yaml
secrets: inherit
with:
arch: blackhole
runner-label: BH
os: "ubuntu-20.04"
# FD C++ Unit Tests
cpp-unit-tests:
needs: build-artifact
Expand All @@ -73,6 +76,7 @@ jobs:
arch: blackhole
runner-label: BH
timeout: 60
os: "ubuntu-20.04"

# profiler-regression:
# needs: build-artifact-profiler
Expand Down
33 changes: 23 additions & 10 deletions .github/workflows/build-and-unit-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ on:
required: false
type: number
default: 35
os:
required: false
type: string
default: "ubuntu-20.04"
workflow_dispatch:
inputs:
arch:
Expand All @@ -34,6 +38,11 @@ on:
required: false
type: number
default: 35
os:
required: false
type: string
default: "ubuntu-20.04"

jobs:
unit-tests-slow-dispatch:
name: ${{ inputs.arch }} ${{ inputs.runner-label }}
Expand All @@ -42,24 +51,28 @@ jobs:
- cloud-virtual-machine
- in-service
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ inputs.arch}}
TT_METAL_SLOW_DISPATCH_MODE: 1
ARCH_NAME: ${{ inputs.arch }}
LOGURU_LEVEL: INFO
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
steps:
- uses: tenstorrent/tt-metal/.github/actions/checkout-with-submodule-lfs@main
- uses: ./.github/actions/prepare-metal-run
with:
arch: ${{ inputs.arch }}
- name: Set up dynamic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
- name: Run pre/post regression tests
timeout-minutes: ${{ inputs.timeout }}
run: |
source ${{ github.workspace }}/python_env/bin/activate
./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type post_commit --dispatch-mode slow
uses: ./.github/actions/docker-run
with:
docker_os_arch: tt-metalium/${{ inputs.os }}-amd64
docker_password: ${{ secrets.GITHUB_TOKEN }}
docker_opts: |
-e ARCH_NAME=${{ inputs.arch}}
-e TT_METAL_HOME=${{ github.workspace }}
-e TT_METAL_SLOW_DISPATCH_MODE=1
-e LD_LIBRARY_PATH=${{ github.workspace }}/build/lib
run_args: |
python3 -m pip install -r $(pwd)/tt_metal/python_env/requirements-dev.txt
pip install -e .
./tests/scripts/run_tests.sh --tt-arch ${{ inputs.arch }} --pipeline-type post_commit --dispatch-mode slow
- uses: ./.github/actions/slack-report
if: ${{ failure() }}
with:
Expand Down
99 changes: 92 additions & 7 deletions .github/workflows/code-analysis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,20 @@ on:
required: false
type: string
default: "ubuntu-22.04-amd64"
full-scan:
required: false
type: boolean
default: false
workflow_dispatch:
inputs:
os:
required: false
type: string
default: "ubuntu-22.04-amd64"
full-scan:
required: false
type: boolean
default: false

jobs:
build-docker-image:
Expand Down Expand Up @@ -41,15 +49,11 @@ jobs:
echo "::error title=ccache-not-provisioned::Ccache is not properly provisioned."
exit 1
fi
- uses: tenstorrent/tt-metal/.github/actions/checkout-with-submodule-lfs@main
- name: Set up dynamic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
echo "RUNNER_UID=$(id -u)" >> $GITHUB_ENV
echo "RUNNER_GID=$(id -g)" >> $GITHUB_ENV
- name: Update submodules
run: |
git submodule update --init --recursive
- name: Generate docker tag
id: generate-docker-tag
uses: ./.github/actions/generate-docker-tag
Expand All @@ -63,6 +67,84 @@ jobs:
password: ${{ secrets.GITHUB_TOKEN }}
- name: Pull docker image
run: docker pull ${{ env.TT_METAL_DOCKER_IMAGE_TAG }}

- name: Check out repo
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: recursive
clean: true

- name: Determine merge base
if: github.ref_name != 'main' && !inputs.full-scan
run: |
echo "Current branch: ${{ github.ref_name }}"
MERGE_BASE=$(git merge-base ${{ github.ref_name }} origin/main)
echo "Merge base between ${{ github.ref_name }} and main: $MERGE_BASE"
echo "MERGE_BASE=$MERGE_BASE" >> $GITHUB_ENV
- name: Check out baseline
if: github.ref_name != 'main' && !inputs.full-scan
uses: actions/checkout@v4
with:
ref: ${{ env.MERGE_BASE }}
fetch-depth: 0
submodules: recursive
clean: true

- name: Create baseline
if: github.ref_name != 'main' && !inputs.full-scan
uses: addnab/docker-run-action@v3
with:
image: ${{ env.TT_METAL_DOCKER_IMAGE_TAG }}
options: |
--rm
--tmpfs /tmp
-u ${{ env.RUNNER_UID }}:${{ env.RUNNER_GID }}
--group-add 1457
-v ${{ github.workspace }}:${{ github.workspace }}
-v /etc/passwd:/etc/passwd:ro
-v /etc/shadow:/etc/shadow:ro
-v /etc/bashrc:/etc/bashrc:ro
-v /home/ubuntu/.ccache-ci:/home/ubuntu/.ccache
-v /mnt/MLPerf/ccache:/mnt/MLPerf/ccache
-e ARCH_NAME=${{ env.ARCH_NAME }}
-e CARGO_HOME=${{ github.workspace }}/.cargo
-w ${{ github.workspace }}
run: |
set -eu # basic shell hygiene
# /tmp is a tmpfs; more efficient than persisted storage
mkdir -p /tmp/ccache
export CCACHE_TEMPDIR=/tmp/ccache
# Zero out the stats so we can see how we did this build
# NOTE: may be inaccurate if we have >1 build runner on the same machine, using the same local cache
ccache -z
# Suppress clang-tidy to first get an up-to-date build tree
ln -sf /usr/bin/true ./clang-tidy-shim
cmake --preset clang-tidy -DCMAKE_CXX_CLANG_TIDY=$(pwd)/clang-tidy-shim -DCMAKE_C_CLANG_TIDY=$(pwd)/clang-tidy-shim
nice -n 19 cmake --build --preset clang-tidy
mkdir -p out
ccache -s > out/ccache.stats
- name: Publish Ccache summary
if: github.ref_name != 'main' && !inputs.full-scan
run: |
echo '## CCache Summary (baseline)' >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
cat out/ccache.stats >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
- name: Checkout repo
uses: actions/checkout@v4
with:
submodules: recursive
clean: false

- name: Analyze code with clang-tidy
uses: addnab/docker-run-action@v3
with:
Expand Down Expand Up @@ -92,10 +174,13 @@ jobs:
# NOTE: may be inaccurate if we have >1 build runner on the same machine, using the same local cache
ccache -z
cmake --preset clang-tidy
# cmake -B .build/clang-tidy -G Ninja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_CXX_CLANG_TIDY=clang-tidy-17 -DTT_UNITY_BUILDS=FALSE -DCMAKE_DISABLE_PRECOMPILE_HEADERS=TRUE -DENABLE_CCACHE=TRUE -DTT_METAL_BUILD_TESTS=TRUE -DTTNN_BUILD_TESTS=TRUE -DBUILD_PROGRAMMING_EXAMPLES=TRUE -DBUILD_TT_TRAIN=TRUE
# Restore shim to legit clang-tidy
# Symlink tomfoolery here so that Ninja believes the build command has not changed from the previous run
ln -sf $(which clang-tidy-17) ./clang-tidy-shim
cmake --preset clang-tidy -DCMAKE_CXX_CLANG_TIDY=$(pwd)/clang-tidy-shim -DCMAKE_C_CLANG_TIDY=$(pwd)/clang-tidy-shim
nice -n 19 cmake --build --preset clang-tidy
mkdir out
mkdir -p out
ccache -s > out/ccache.stats
- name: Publish Ccache summary
run: |
Expand Down
25 changes: 20 additions & 5 deletions .github/workflows/cpp-post-commit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ on:
required: false
type: number
default: 80
os:
required: false
type: string
default: "ubuntu-20.04"
workflow_dispatch:
inputs:
arch:
Expand All @@ -34,6 +38,10 @@ on:
required: false
type: number
default: 60
os:
required: false
type: string
default: "ubuntu-20.04"

jobs:
models:
Expand Down Expand Up @@ -67,11 +75,18 @@ jobs:
arch: ${{ inputs.arch }}
- name: ${{ matrix.test-group.name }} tests
timeout-minutes: ${{ inputs.timeout }}
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
${{ matrix.test-group.cmd }}
uses: ./.github/actions/docker-run
with:
docker_os_arch: tt-metalium/${{ inputs.os }}-amd64
docker_password: ${{ secrets.GITHUB_TOKEN }}
docker_opts: |
-e TT_METAL_HOME=${{ github.workspace }}
-e ARCH_NAME=${{ inputs.arch }}
-e LD_LIBRARY_PATH=${{ github.workspace }}/build/lib
run_args: |
python3 -m pip install -r $(pwd)/tt_metal/python_env/requirements-dev.txt
python3 -m pip install -e .
${{ matrix.test-group.cmd }}
- uses: ./.github/actions/slack-report
if: ${{ failure() }}
with:
Expand Down
11 changes: 10 additions & 1 deletion .github/workflows/fast-dispatch-build-and-unit-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ on:
required: false
type: number
default: 45
os:
required: false
type: string
default: "ubuntu-20.04"
workflow_dispatch:
inputs:
arch:
Expand All @@ -34,6 +38,10 @@ on:
required: false
type: number
default: 45
os:
required: false
type: string
default: "ubuntu-20.04"

jobs:
fd-tests:
Expand All @@ -42,7 +50,7 @@ jobs:
# so we try not to get hanging machines
fail-fast: false
matrix:
os: ["ubuntu-20.04"]
os: ["${{ inputs.os }}"]
test-group: [
{name: eager unit tests 1, cmd: pytest tests/tt_eager/python_api_testing/unit_testing/ -xvvv --splits 7 --group 1 },
{name: eager unit tests 2, cmd: pytest tests/tt_eager/python_api_testing/unit_testing/ -xvvv --splits 7 --group 2 },
Expand Down Expand Up @@ -70,6 +78,7 @@ jobs:
timeout-minutes: ${{ inputs.timeout }}
uses: ./.github/actions/docker-run
with:
docker_os_arch: tt-metalium/${{ inputs.os }}-amd64
install_wheel: true
docker_password: ${{ secrets.GITHUB_TOKEN }}
run_args: |
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/publish-release-image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ jobs:
uses: ./.github/actions/docker-run
with:
docker_os_arch: tt-metalium-${{ matrix.os }}-amd64-release/${{ matrix.test_group.arch }}
docker_version: ${{ inputs.version }}
docker_password: ${{ secrets.GITHUB_TOKEN }}
run_args: |
${{ matrix.test_group.cmd }}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/t3000-frequent-tests-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ jobs:
{ name: "t3k llama3.2-vision tests", arch: wormhole_b0, cmd: run_t3000_llama3.2-11b-vision_freq_tests, timeout: 60, owner_id: U03FJB5TM5Y}, #Colman Glagovich
{ name: "t3k n300 mesh llama3.2-vision tests", arch: wormhole_b0, cmd: run_t3000_spoof_n300_llama3.2-11b-vision_freq_tests, timeout: 60, owner_id: U03FJB5TM5Y}, #Colman Glagovich
{ name: "t3k llama3 tests", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 45, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
{ name: "t3k llama3 accuracy tests", arch: wormhole_b0, cmd: run_t3000_llama3_accuracy_tests, timeout: 45, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
{ name: "t3k llama2_70b tests", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 45, owner_id: U03FJB5TM5Y}, #Colman Glagovich
# { name: "t3k llama3_70b tests", arch: wormhole_b0, cmd: run_t3000_llama3_70b_tests, timeout: 45, owner_id: U03FJB5TM5Y}, #Colman Glagovich # FIXME issue #14934
{ name: "t3k mixtral tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 60, owner_id: U03PUAKE719}, #Miguel Tairum Cruz
Expand Down
Loading

0 comments on commit 33b12de

Please sign in to comment.