Skip to content

Commit

Permalink
Merge branch 'main' into sabira/ttnn_lenet
Browse files Browse the repository at this point in the history
  • Loading branch information
sabira-mcw authored Dec 6, 2024
2 parents 7d948f4 + 317d346 commit 28a9bf3
Show file tree
Hide file tree
Showing 71 changed files with 4,666 additions and 1,819 deletions.
1 change: 0 additions & 1 deletion .github/workflows/all-post-commit-workflows.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,6 @@ jobs:
secrets: inherit
with:
os: ubuntu-22.04-amd64
if: github.event_name == 'push'
tt-train-cpp-unit-tests:
needs: build-artifact
secrets: inherit
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/metal-run-microbenchmarks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ jobs:
PIPELINE_TYPE="microbenchmarks"
if [ "${{ matrix.runner-info.ccl }}" == "true" ]; then
PIPELINE_TYPE="ccl_microbenchmarks"
else
TT_METAL_SLOW_DISPATCH_MODE=1 ./tests/scripts/run_tunneler_tests.sh --machine-type ${{ matrix.runner-info.runs-on[0] }}
fi
./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type "$PIPELINE_TYPE"
- name: Upload microbenchmark report csvs
Expand Down
23 changes: 0 additions & 23 deletions .github/workflows/test-comment.yaml

This file was deleted.

4 changes: 4 additions & 0 deletions .github/workflows/ttnn-run-sweeps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,11 @@ on:
- eltwise.unary.rsqrt.rsqrt_pytorch2
- eltwise.unary.rdiv.rdiv
- eltwise.unary.frac.frac
- eltwise.unary.frac.frac_sharded
- eltwise.unary.ceil.ceil
- eltwise.unary.ceil.ceil_pytorch2
- eltwise.unary.trunc.trunc
- eltwise.unary.trunc.trunc_sharded
- eltwise.unary.floor.floor
- eltwise.unary.floor.floor_pytorch2
- eltwise.unary.clone.clone
Expand Down Expand Up @@ -111,6 +113,7 @@ on:
- eltwise.unary.relu_max.relu_max
- eltwise.unary.softplus.softplus
- eltwise.unary.selu.selu
- eltwise.unary.softshrink.softshrink_sharded
- eltwise.unary_backward.fill_zero_bw
- eltwise.unary_backward.log_sigmoid_bw
- eltwise.unary_backward.logit_bw
Expand Down Expand Up @@ -180,6 +183,7 @@ on:
- eltwise.unary.mish.mish
- eltwise.unary.mish.mish_sharded
- eltwise.unary.multigammaln.multigammaln
- eltwise.unary.multigammaln.multigammaln_sharded
- eltwise.unary.isfinite.isfinite
- eltwise.unary.isfinite.isfinite_sharded
- eltwise.unary.isinf.isinf
Expand Down
2 changes: 2 additions & 0 deletions CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,8 @@ models/demos/t3000/mixtral8x7b @yieldthought @mtairum @uaydonat
models/demos/tg/llama3_70b @cglagovichTT @uaydonat @johanna-rock-tt @djordje-tt @kpaigwar
models/demos/tg/falcon7b @skhorasganiTT @djordje-tt @uaydonat
models/demos/grayskull @uaydonat
models/demos/yolov4 @dvartaniansTT @shwetankTT
models/demos/wormhole/yolov4 @dvartaniansTT @shwetankTT
models/demos/**/*resnet* @mywoodstock @shwetankTT @tt-aho
models/experimental/functional_unet @esmalTT @uaydonat @mywoodstock
models/perf/ @uaydonat
Expand Down
2 changes: 1 addition & 1 deletion models/demos/mnist/tests/test_perf_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def test_perf_device_bare_metal(batch_size, reset_seeds):
num_iterations = 1
margin = 0.03
if is_grayskull():
expected_perf = 390000.0
expected_perf = 402500.0
elif is_wormhole_b0():
expected_perf = 900000.0

Expand Down
4 changes: 2 additions & 2 deletions models/demos/vgg/tests/test_perf_vgg.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,10 @@ def test_perf_device_bare_metal_vgg(batch_size, model_name):
margin = 0.03

if model_name == "ttnn_vgg11":
expected_perf = 36 if is_grayskull() else 104
expected_perf = 36 if is_grayskull() else 114
command = f"pytest tests/ttnn/integration_tests/vgg/test_ttnn_vgg11.py"
else:
expected_perf = 34 if is_grayskull() else 90
expected_perf = 34 if is_grayskull() else 105
command = f"pytest tests/ttnn/integration_tests/vgg/test_ttnn_vgg16.py"

cols = ["DEVICE FW", "DEVICE KERNEL", "DEVICE BRISC KERNEL"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,8 @@ def __call__(
hidden_states = ttnn.reshape(
hidden_states, (self.batch_size, 1, self.conv2_input_height * self.conv2_input_width, in_channels)
)
hidden_states = ttnn.reallocate(hidden_states)

hidden_states = ttnn.group_norm(
hidden_states,
num_groups=groups,
Expand Down
3 changes: 1 addition & 2 deletions tests/scripts/run_tt_eager.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,11 @@
)

TT_EAGER_COMMON_TEST_ENTRIES = (
void_for_gs(TestEntry("tt_eager/tests/ops/ccl/test_ccl_helpers", "ops/ccl/test_ccl_helpers")),
void_for_gs(TestEntry("tt_eager/tests/ops/ccl/test_ccl_tensor_slicers", "ops/ccl/test_ccl_tensor_slicers")),
TestEntry("tt_eager/tests/ops/test_eltwise_binary_op", "ops/test_eltwise_binary_op"),
TestEntry("tt_eager/tests/ops/test_bcast_op", "ops/test_bcast_op"),
TestEntry("tt_eager/tests/ops/test_transpose_op", "ops/test_transpose_op"),
TestEntry("tt_eager/tests/ops/test_sliding_window_ops", "ops/test_sliding_window_ops"),
TestEntry("tt_eager/tests/ops/test_tensor_utils", "ops/test_tensor_utils"),
TestEntry("tt_eager/tests/ops/test_bmm_op", "ops/test_bmm_op"),
void_for_bh(void_for_whb0(TestEntry("tt_eager/tests/ops/test_eltwise_unary_op", "ops/test_eltwise_unary_op"))),
void_for_whb0(
Expand Down
57 changes: 57 additions & 0 deletions tests/scripts/run_tunneler_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/bin/bash

set -eo pipefail

if [[ -z "$TT_METAL_HOME" ]]; then
echo "Must provide TT_METAL_HOME in environment" 1>&2
exit 1
fi

if [[ -z "$TT_METAL_SLOW_DISPATCH_MODE" ]]; then
echo "Must provide TT_METAL_SLOW_DISPATCH_MODE in environment" 1>&2
exit 1
fi

export TT_METAL_CLEAR_L1=1

echo "Running tunneler tests now...";

run_test() {
echo $1
$1
echo
};

run_test_with_watcher() {
echo $1
TT_METAL_WATCHER=1 TT_METAL_WATCHER_NOINLINE=1 $1
echo
};

main() {
# Parse the arguments
while [[ $# -gt 0 ]]; do
case $1 in
--machine-type)
machine_type=$2
shift
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
shift
done

if [[ $ARCH_NAME == "wormhole_b0" && $machine_type != "N150" ]]; then
for max_packet_size_words in 256 512 1024 2048; do
run_test "./build/test/tt_metal/perf_microbenchmark/routing/test_vc_uni_tunnel --tx_x 4 --tx_y 7 --mux_x 0 --mux_y 7 --demux_x 0 --demux_y 0 --rx_x 0 --rx_y 1 --max_packet_size_words $max_packet_size_words --tx_skip_pkt_content_gen 1 --rx_disable_data_check 1 --rx_disable_header_check 1 --tx_pkt_dest_size_choice 1 --check_txrx_timeout 1 --data_kb_per_tx 1048576 --tunneler_queue_size_bytes 32768 --tx_queue_size_bytes 65536 --rx_queue_size_bytes 131072 --mux_queue_size_bytes 65536 --demux_queue_size_bytes 65536"
run_test "./build/test/tt_metal/perf_microbenchmark/routing/test_vc_bi_tunnel_2ep --tx_x 4 --tx_y 7 --mux_x 0 --mux_y 7 --demux_x 0 --demux_y 0 --rx_x 0 --rx_y 1 --max_packet_size_words $max_packet_size_words --tx_skip_pkt_content_gen 1 --rx_disable_data_check 1 --rx_disable_header_check 1 --tx_pkt_dest_size_choice 1 --check_txrx_timeout 1 --data_kb_per_tx 1048576 --tunneler_queue_size_bytes 32768 --tx_queue_size_bytes 65536 --rx_queue_size_bytes 131072 --mux_queue_size_bytes 65536 --demux_queue_size_bytes 65536"
run_test "./build/test/tt_metal/perf_microbenchmark/routing/test_vc_bi_tunnel_4ep --tx_x 4 --tx_y 7 --mux_x 0 --mux_y 7 --demux_x 0 --demux_y 0 --rx_x 0 --rx_y 1 --max_packet_size_words $max_packet_size_words --tx_skip_pkt_content_gen 1 --rx_disable_data_check 1 --rx_disable_header_check 1 --tx_pkt_dest_size_choice 1 --check_txrx_timeout 1 --data_kb_per_tx 1048576 --tunneler_queue_size_bytes 16384 --tx_queue_size_bytes 65536 --rx_queue_size_bytes 131072 --mux_queue_size_bytes 65536 --demux_queue_size_bytes 65536"
done
fi

}

main "$@"
20 changes: 12 additions & 8 deletions tests/sweep_framework/sweep_utils/sharding_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import _gen_reshape_args_from_volume


def gen_sharded_spec_unary(num_shapes, max_tensor_size=4 * 1024 * 1024, layouts=["TILE_LAYOUT", "ROW_MAJOR_LAYOUT"]):
def gen_sharded_spec_unary(num_shapes, max_tensor_size_per_core=62 * 1024, layouts=["TILE_LAYOUT", "ROW_MAJOR_LAYOUT"]):
# device.compute_with_storage_grid_size()
y = 8
x = 8
Y = 8
X = 8

# ["BLOCK", "WIDTH", "HEIGHT", "tensor_wh"]
sharding_strategy_list = ["BLOCK", "WIDTH", "HEIGHT", "tensor_wh"]
Expand All @@ -29,6 +29,10 @@ def gen_sharded_spec_unary(num_shapes, max_tensor_size=4 * 1024 * 1024, layouts=
tensor_hw_as_shard_shape = False

for _ in range(num_shapes):
x = random.randint(1, X)
y = random.randint(1, Y)
max_tensor_size = max_tensor_size_per_core * x * y

if tensor_hw_as_shard_shape:
# Gets stuck:
# X 8 Y 8 input_shape [1, 17792, 8] DataType.BFLOAT8_B Layout.TILE ShardStrategy.BLOCK ShardOrientation.COL_MAJOR tensor_hw_as_shard_shape True
Expand All @@ -53,11 +57,6 @@ def gen_sharded_spec_unary(num_shapes, max_tensor_size=4 * 1024 * 1024, layouts=
input_shape[-1] *= 2
input_shape[-2] //= 2

if shard_orientation == "COL_MAJOR":
tmp = input_shape[-2]
input_shape[-2] = input_shape[-1]
input_shape[-1] = tmp

elif sharding_strategy == "BLOCK":
min_shard_size_y = 32 * y
min_shard_size_x = 32 * x
Expand All @@ -68,6 +67,11 @@ def gen_sharded_spec_unary(num_shapes, max_tensor_size=4 * 1024 * 1024, layouts=
physical_shape[1] *= min_shard_size_y
physical_shape[0] *= min_shard_size_x

if shard_orientation == "ROW_MAJOR":
tmp = physical_shape[-2]
physical_shape[-2] = physical_shape[-1]
physical_shape[-1] = tmp

input_shape = random.choice(_gen_reshape_args_from_volume(physical_shape[0], step=1, out_dims=rank - 1))
input_shape = list(input_shape["reshape_dims"])
input_shape.append(physical_shape[1])
Expand Down
109 changes: 109 additions & 0 deletions tests/sweep_framework/sweeps/eltwise/unary/frac/frac_sharded.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

from typing import Optional, Tuple
from functools import partial

import json
import torch
import random
import ttnn
import math
from tests.sweep_framework.sweep_utils.utils import gen_shapes, sanitize_shape_rm
from tests.sweep_framework.sweep_utils.sharding_utils import gen_sharded_spec_unary, parse_sharding_spec
from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt

from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
from models.utility_functions import torch_random

# Override the default timeout in seconds for hang detection.
TIMEOUT = 120

random.seed(0)


# Parameters provided to the test vector generator are defined here.
# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values.
# Each suite has a key name (in this case "suite_1" and "suite_2") which will associate the test vectors to this specific suite of inputs.
# Developers can create their own generator functions and pass them to the parameters as inputs.
parameters = {
"nightly": {
"input_spec": gen_sharded_spec_unary(12, layouts=["TILE_LAYOUT"]),
"input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
},
}


# Invalidate vector is called during the generation phase where each vector will be passed in.
# If invalidated, the vector will still be stored but will be skipped.
# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
input_shape, X, Y, sharding_strategy, _, _, input_layout = test_vector["input_spec"].values()
pre_sharded_height = math.prod(input_shape[:-1])
pre_sharded_width = input_shape[-1]

if input_layout == "ROW_MAJOR_LAYOUT":
return True, "Input to eltwise binary must be tilized"

if input_layout == "ROW_MAJOR_LAYOUT" and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
return True, "bfloat8_b is only supported on tiled layout"

return False, None


# This is the run instructions for the test, defined by the developer.
# The run function must take the above-defined parameters as inputs.
# The runner will call this run function with each test vector, and the returned results from this function will be stored.
# If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra.
def run(
input_spec,
input_a_dtype,
*,
device,
) -> list:
data_seed = random.randint(0, 20000000)
torch.manual_seed(data_seed)

(
input_shape,
core_grid,
sharding_strategy,
shard_orientation,
tensor_hw_as_shard_shape,
input_layout,
) = parse_sharding_spec(input_spec)

if input_layout == ttnn.ROW_MAJOR_LAYOUT:
input_shape = sanitize_shape_rm(input_shape)

torch_input_tensor_a = gen_func_with_cast_tt(
partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype
)(input_shape)

torch_op = ttnn.get_golden_function(ttnn.frac)
torch_output_tensor = torch_op(torch_input_tensor_a)

sharded_config = ttnn.create_sharded_memory_config_(
shape=input_shape,
core_grid=core_grid,
strategy=sharding_strategy,
orientation=shard_orientation,
use_height_and_width_as_shard_shape=tensor_hw_as_shard_shape,
)

input_tensor_a = ttnn.from_torch(
torch_input_tensor_a,
dtype=input_a_dtype,
layout=input_layout,
device=device,
memory_config=sharded_config,
)

start_time = start_measuring_time()
output_tensor = ttnn.frac(input_tensor_a, memory_config=sharded_config)
e2e_perf = stop_measuring_time(start_time)
output_tensor = ttnn.to_torch(output_tensor)

pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999)
return [pcc, e2e_perf]
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# Developers can create their own generator functions and pass them to the parameters as inputs.
parameters = {
"nightly": {
"input_spec": gen_sharded_spec_unary(16, max_tensor_size=2 * 1024 * 1024, layouts=["TILE_LAYOUT"]),
"input_spec": gen_sharded_spec_unary(16, max_tensor_size_per_core=20 * 1024, layouts=["TILE_LAYOUT"]),
"input_a_dtype": [ttnn.bfloat16],
},
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# Developers can create their own generator functions and pass them to the parameters as inputs.
parameters = {
"nightly": {
"input_spec": gen_sharded_spec_unary(16, max_tensor_size=1 * 1024 * 1024, layouts=["TILE_LAYOUT"]),
"input_spec": gen_sharded_spec_unary(16, max_tensor_size_per_core=14 * 1024, layouts=["TILE_LAYOUT"]),
"input_a_dtype": [ttnn.bfloat16],
"eps": [0.2], # 0, 10e-6, 10e-4, 10e-2,
},
Expand Down
Loading

0 comments on commit 28a9bf3

Please sign in to comment.