Skip to content

Commit

Permalink
Merge branch 'main' into DM/schedulers
Browse files Browse the repository at this point in the history
  • Loading branch information
dmakoviichuk-tt authored Dec 9, 2024
2 parents 31a5ecb + e3526de commit f100b80
Show file tree
Hide file tree
Showing 1,302 changed files with 59,284 additions and 32,681 deletions.
52 changes: 26 additions & 26 deletions .clang-format-ignore
Original file line number Diff line number Diff line change
@@ -1,26 +1,3 @@
tests/tt_eager/ops/ccl/test_ccl_reduce_scatter_host_helpers.cpp
tests/tt_metal/distributed/test_distributed.cpp
tests/tt_metal/tt_metal/common/device_fixture.hpp
tests/tt_metal/tt_metal/eth/test_erisc_app_direct_send.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/kernels/traffic_gen_rx.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/kernels/traffic_gen_tx.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_bi_tunnel.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_common.hpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_mux_demux.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_mux_demux_2level.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_tunnel_1cq.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_tunnel_2cq.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_tx_rx.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_uni_tunnel.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_uni_tunnel_single_chip.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_vc_mux_demux.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_vc_uni_tunnel.cpp
tests/tt_metal/tt_metal/test_compile_sets_kernel_binaries.cpp
tests/ttnn/unit_tests/gtests/ccl/kernels/fabric_erisc_datamover_sender_worker_sender.cpp
tests/ttnn/unit_tests/gtests/ccl/kernels/fabric_worker_sender_multi_input.cpp
tests/ttnn/unit_tests/gtests/ccl/kernels/test_kernels.common.hpp
tests/ttnn/unit_tests/gtests/ccl/test_fabric_erisc_data_mover_loopback_with_workers.cpp
tests/ttnn/unit_tests/gtests/test_graph_basic.cpp
tt_metal/distributed/mesh_device.cpp
tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_unpack_tilize_api.h
tt_metal/impl/buffers/buffer.cpp
Expand Down Expand Up @@ -54,7 +31,6 @@ tt_metal/llrt/rtoptions.hpp
tt_metal/llrt/tt_cluster.cpp
tt_metal/llrt/tt_cluster.hpp
tt_metal/llrt/tt_memory.h
tt_metal/tt_metal.cpp
ttnn/cpp/pybind11/tensor.cpp
ttnn/cpp/ttnn/device_operation.hpp
ttnn/cpp/ttnn/graph/graph_processor.cpp
Expand Down Expand Up @@ -123,9 +99,33 @@ ttnn/cpp/ttnn/operations/embedding_backward/device/embedding_backward_device_ope
ttnn/cpp/ttnn/operations/embedding_backward/device/embedding_backward_device_operation.hpp
ttnn/cpp/ttnn/operations/experimental/ccl/reduce_scatter_async/device/reduce_scatter_async_program.cpp
ttnn/cpp/ttnn/operations/experimental/matmul/group_attn_matmul/device/kernels/compute/transformer_group_attn_matmul.cpp
ttnn/cpp/ttnn/operations/pool/maxpool/max_pool2d.cpp
ttnn/cpp/ttnn/operations/pool/generic/generic_pools.cpp
ttnn/cpp/ttnn/operations/uniform/device/uniform_device_operation.cpp
ttnn/cpp/ttnn/operations/uniform/device/uniform_device_operation.hpp
ttnn/cpp/ttnn/tensor/types.hpp

tests/*
tests/tt_metal/test_utils/env_vars.hpp
tests/tt_metal/tt_metal/api/allocator/test_free_list_opt_allocator.cpp
tests/tt_metal/tt_metal/api/test_global_semaphores.cpp
tests/tt_metal/tt_metal/dispatch/sub_device_test_utils.hpp
tests/tt_metal/tt_metal/perf_microbenchmark/10_dram_read_remote_cb_sync/test_dram_read_remote_cb.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/11_remote_cb_sync_matmul_single_core/test_remote_cb_sync_matmul.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/kernels/traffic_gen_rx.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/kernels/traffic_gen_tx.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_common.hpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_mux_demux.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_mux_demux_2level.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_tx_rx.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_vc_bi_tunnel_2ep.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_vc_bi_tunnel_4ep.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_vc_loopback_tunnel.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_vc_mux_demux.cpp
tests/tt_metal/tt_metal/perf_microbenchmark/routing/test_vc_uni_tunnel.cpp
tests/ttnn/unit_tests/gtests/test_ccl_on_galaxy.cpp

# Suspicious Formatting Could Cause Issues
tests/tt_metal/tt_metal/debug_tools/*
tests/tt_metal/tt_metal/test_kernels/misc/watcher_asserts.cpp
tests/tt_metal/tt_metal/test_kernels/misc/watcher_pause.cpp
tests/tt_metal/tt_metal/test_kernels/misc/watcher_ringbuf.cpp
tests/tt_metal/tt_metal/test_kernels/misc/watcher_waypoints.cpp
5 changes: 3 additions & 2 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# CMake formatting
0be480e4450b902174f8c1f03559a8fc6eebb827
# clang-format-ttnn
# clang-format
352e61a97c8d843ac4120aec196e6e062879e98c
# clang-format-ttmetal
effbfe00f94b5a46f51f923bee56d6d8044afd01
1758447481544acd234d1ab7579674bf2429fdd0
94084b94fbfab32e8d6248255f1dfc166bf80255
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

/cmake/version.cmake export-subst
4 changes: 2 additions & 2 deletions .github/actions/docker-run/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ inputs:
docker_os_arch:
description: 'Docker image architecture'
required: false
default: ubuntu-20.04-amd64
default: tt-metalium/ubuntu-20.04-amd64
docker_username:
description: docker login username
required: true
Expand Down Expand Up @@ -89,7 +89,7 @@ runs:
set -eu
install_wheel=${{ inputs.install_wheel }}
if [[ "${install_wheel,,}" == "true" ]]; then
if [ "${install_wheel,,}" == "true" ]; then
WHEEL_FILENAME=$(ls -1 *.whl)
pip3 install "$WHEEL_FILENAME"
fi
Expand Down
4 changes: 2 additions & 2 deletions .github/actions/generate-docker-tag/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ runs:
- name: Determine Full Docker Image Tag
shell: bash
run: |
echo "TT_METAL_DOCKER_IMAGE_TAG=ghcr.io/${{ github.repository }}/tt-metalium/${{ inputs.image }}:${{ env.IMAGE_TAG }}" >> $GITHUB_ENV
echo "TT_METAL_REF_IMAGE_TAG=ghcr.io/${{ github.repository }}/tt-metalium/${{ inputs.image }}:latest" >> $GITHUB_ENV
echo "TT_METAL_DOCKER_IMAGE_TAG=ghcr.io/${{ github.repository }}/${{ inputs.image }}:${{ env.IMAGE_TAG }}" >> $GITHUB_ENV
echo "TT_METAL_REF_IMAGE_TAG=ghcr.io/${{ github.repository }}/${{ inputs.image }}:latest" >> $GITHUB_ENV
- name: Output Docker Image Tag
shell: bash
run: |
Expand Down
1 change: 1 addition & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ Summarize the changes made and its impact.
- [ ] Blackhole Post commit (if applicable)
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [ ] **(For models and ops writers)** Full [new models](https://github.com/tenstorrent/tt-metal/actions/workflows/full-new-models-suite.yaml) tests passes
- [ ] New/Existing tests provide coverage for changes
3 changes: 3 additions & 0 deletions .github/workflows/_produce-data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ on:
- "(Single-card) Model perf tests"
- "(Single-card) Device perf tests"
- "(Single-card) Demo tests"
- "(Single-card) Tests for new models"
- "Nightly fast dispatch tests"
- "(Single-card) Tests for new models"
- "(T3K) T3000 demo tests"
- "(T3K) T3000 model perf tests"
- "(T3K) T3000 perplexity tests"
Expand All @@ -39,6 +41,7 @@ on:
- "(TGG) TGG frequent tests"
- "ttnn - Run sweeps"
- "Blackhole post-commit tests"
- "Custom test dispatch"
types:
- completed

Expand Down
20 changes: 5 additions & 15 deletions .github/workflows/all-post-commit-workflows.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ name: "All post-commit tests"

on:
workflow_call:
inputs:
build-type:
required: false
default: Release
type: string
workflow_dispatch:
inputs:
build-type:
Expand Down Expand Up @@ -73,21 +78,6 @@ jobs:
build-docker: false
build-type: ${{ inputs.build-type || 'Release' }}
secrets: inherit
# UMD Unit Tests
umd-unit-tests:
secrets: inherit
strategy:
fail-fast: false
matrix:
test-group: [
{ arch: grayskull, runner-label: E150 },
{ arch: wormhole_b0, runner-label: N150 },
{ arch: wormhole_b0, runner-label: N300 },
]
uses: ./.github/workflows/umd-unit-tests.yaml
with:
arch: ${{ matrix.test-group.arch }}
runner-label: ${{ matrix.test-group.runner-label }}
# Slow Dispatch Unit Tests
sd-unit-tests:
needs: build-artifact
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-artifact.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ jobs:
id: generate-docker-tag
uses: ./.github/actions/generate-docker-tag
with:
image: ${{ inputs.os }}
image: tt-metalium/${{ inputs.os }}
- name: Docker login
uses: docker/login-action@v3
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-docker-artifact.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
- name: Determine docker image tag
uses: ./.github/actions/generate-docker-tag
with:
image: ${{ inputs.os }}
image: tt-metalium/${{ inputs.os }}
- name: Build Docker image and push to GHCR
if: steps.changed-files-specific.outputs.any_changed == 'true'
uses: docker/build-push-action@v6
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,13 @@ jobs:
with:
docker_username: ${{ github.actor }}
docker_password: ${{ secrets.GITHUB_TOKEN }}
docker_image_arch: ${{ inputs.arch }}
docker_opts: |
-e ARCH_NAME=${{ matrix.arch }}
--group-add 1457
-v /home/ubuntu/.ccache-ci:/home/ubuntu/.ccache
-e CCACHE_DIR=/home/ubuntu/.ccache
-v /mnt/MLPerf/ccache:/mnt/MLPerf/ccache
docker_os_arch: ${{ matrix.build.os }}-amd64
docker_os_arch: tt-metalium/${{ matrix.build.os }}-amd64
run_args: |
set -eu # basic shell hygiene
set -x
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/code-analysis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
id: generate-docker-tag
uses: ./.github/actions/generate-docker-tag
with:
image: ${{ inputs.os }}
image: tt-metalium/${{ inputs.os }}
- name: Docker login
uses: docker/login-action@v3
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/cpp-ttnn-project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
id: generate-docker-tag
uses: ./.github/actions/generate-docker-tag
with:
image: ubuntu-22.04-amd64
image: tt-metalium/ubuntu-22.04-amd64
- name: Docker login
uses: docker/login-action@v3
with:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,8 @@ jobs:
fail-fast: false
matrix:
test-config:
- model: "wh_b0_unstable"
cmd: ./tests/scripts/single_card/nightly/run_wh_b0_unstable.sh
- model: "stable_diffusion"
cmd: pytest --timeout 900 -n auto tests/nightly/single_card/stable_diffusion
- model: "mamba 1"
cmd: pytest --timeout 900 -n auto tests/nightly/single_card/mamba --splits 6 --group 1
- model: "mamba 2"
Expand Down
60 changes: 60 additions & 0 deletions .github/workflows/full-new-models-suite.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
name: "(Single-card) Tests for new models"

on:
workflow_dispatch:
inputs:
build-type:
required: false
default: Release
type: choice
options:
- Release
- Debug
- RelWithDebInfo
- CI

permissions:
actions: read
contents: write
pull-requests: write
pages: write
id-token: write
packages: write

jobs:
build-docker-image-2004:
uses: ./.github/workflows/build-docker-artifact.yaml
secrets: inherit
with:
os: ubuntu-20.04-amd64
build-artifact:
needs: build-docker-image-2004
uses: ./.github/workflows/build-artifact.yaml
secrets: inherit
with:
build-docker: false
build-type: ${{ inputs.build-type || 'Release' }}
build-artifact-profiler:
needs: build-docker-image-2004
uses: ./.github/workflows/build-artifact.yaml
with:
tracy: true
build-docker: false
build-type: ${{ inputs.build-type || 'Release' }}
secrets: inherit
device-perf-single-card:
needs: build-artifact-profiler
uses: ./.github/workflows/perf-device-models-impl.yaml
secrets: inherit
e2e-model-perf-single-card:
needs: build-artifact
uses: ./.github/workflows/perf-models-impl.yaml
secrets: inherit
nightly-single-card:
needs: build-artifact
uses: ./.github/workflows/fast-dispatch-full-regressions-and-models-impl.yaml
secrets: inherit
demos-single-card:
needs: build-artifact
uses: ./.github/workflows/single-card-demo-tests-impl.yaml
secrets: inherit
10 changes: 9 additions & 1 deletion .github/workflows/metal-run-microbenchmarks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ jobs:
# {arch: wormhole_b0, runs-on: ["pipeline-perf", "N150", "bare-metal", "in-service"]},
# N300
{arch: wormhole_b0, runs-on: ["N300", "pipeline-perf", "bare-metal", "in-service"]},
{arch: wormhole_b0, runs-on: ["N300", "pipeline-perf", "bare-metal", "in-service"], ccl: true},
]
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
Expand All @@ -37,7 +38,14 @@ jobs:
./create_venv.sh
- name: Run microbenchmark tests
timeout-minutes: 90
run: ./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type microbenchmarks
run: |
PIPELINE_TYPE="microbenchmarks"
if [ "${{ matrix.runner-info.ccl }}" == "true" ]; then
PIPELINE_TYPE="ccl_microbenchmarks"
else
TT_METAL_SLOW_DISPATCH_MODE=1 ./tests/scripts/run_tunneler_tests.sh --machine-type ${{ matrix.runner-info.runs-on[0] }}
fi
./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type "$PIPELINE_TYPE"
- name: Upload microbenchmark report csvs
uses: actions/upload-artifact@v4
with:
Expand Down
28 changes: 6 additions & 22 deletions .github/workflows/package-and-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -180,32 +180,16 @@ jobs:
infra/machine_setup/scripts/setup_hugepages.py
metal_libs-*+*.whl
fail_on_unmatched_files: true
create-docker-image:
create-docker-release-image:
needs: [
create-tag,
create-and-upload-draft-release
]
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: https://ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push
env:
TT_METAL_DOCKER_IMAGE: tt-metalium/ubuntu-20.04-amd64
uses: docker/build-push-action@v6
with:
push: true
tags: ghcr.io/${{ github.repository }}/tt-metalium/ubuntu-20.04-amd64:${{ needs.create-tag.outputs.version }}-dev
context: .
file: dockerfile/ubuntu-20.04-amd64.Dockerfile
uses: ./.github/workflows/publish-release-image.yaml
secrets: inherit
with:
version: ${{ needs.create-tag.outputs.version }}
is_major_version: ${{ needs.get-params.outputs.is-release-candidate !='true' && needs.get-params.outputs.should-create-release == 'true' }}
release-docs:
needs: [
get-params,
Expand Down
30 changes: 23 additions & 7 deletions .github/workflows/publish-release-image-wrapper.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,28 @@
name: "Create and Publish Release Docker Image"

on:
workflow_call:
workflow_dispatch:

jobs:
to_be_filled_out:
steps:
- name: This workflow will be filled out in https://github.com/tenstorrent/tt-metal/pull/15013
run: |
echo "NOOP"
build-artifact:
uses: ./.github/workflows/build-artifact.yaml
secrets: inherit
build-wheels:
needs: build-artifact
strategy:
matrix:
# Since pre-compiled builds only run on 20.04, we can only test on 20.04 for now
# The full 22.04 flow can be tested without precompiled
os: [ubuntu-20.04]
arch: [grayskull, wormhole_b0]
uses: ./.github/workflows/_build-wheels-impl.yaml
with:
os: ${{ matrix.os }}
arch: ${{ matrix.arch }}
from-precompiled: true
publish-release-image:
needs: build-wheels
uses: ./.github/workflows/publish-release-image.yaml
secrets: inherit
with:
version: dev-${GITHUB_REF_NAME//\//-}
is_major_version: false
Loading

0 comments on commit f100b80

Please sign in to comment.