Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#11512: Add sweeps for eltwise sharded ops #16003

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/ttnn-run-sweeps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,12 @@ on:
- eltwise.unary.tril.tril_pytorch2
- eltwise.unary.clamp.clamp
- eltwise.unary.clamp.clamp_forge
- eltwise.unary.clamp.clamp_sharded
- eltwise.unary.clamp.clamp_pytorch2
- eltwise.unary.clamp.clamp_min_pytorch2
- eltwise.unary.clip.clip
- eltwise.unary.cbrt.cbrt
- eltwise.unary.cbrt.cbrt_sharded
- eltwise.unary.rsub.rsub
- eltwise.unary.rsub.rsub_pytorch2
- eltwise.unary.rsqrt.rsqrt_pytorch2
Expand All @@ -56,6 +58,7 @@ on:
- eltwise.unary.trunc.trunc_sharded
- eltwise.unary.floor.floor
- eltwise.unary.floor.floor_forge
- eltwise.unary.floor.floor_sharded
- eltwise.unary.floor.floor_pytorch2
- eltwise.unary.clone.clone
- eltwise.unary.elu.elu
Expand Down Expand Up @@ -216,6 +219,7 @@ on:
- eltwise.unary.isneginf.isneginf_sharded
- eltwise.unary.isposinf.isposinf
- eltwise.unary.isposinf.isposinf_sharded
- eltwise.binary.add.add_sharded
- eltwise.binary.add.add_all_pytorch2
- eltwise.binary.add.add_set2_pytorch2
- eltwise.binary.add.add_different_memory_configs
Expand All @@ -235,14 +239,17 @@ on:
- eltwise.unary_complex.angle_bw.angle_bw
- eltwise.unary_complex.conj_bw
- eltwise.binary.subtract.subtract
- eltwise.binary.subtract.subtract_sharded
- eltwise.binary.subtract.subtract_tensor_pytorch2
- eltwise.binary.multiply.multiply
- eltwise.binary.multiply.mul_tensor_pytorch2
- eltwise.binary.multiply.multiply_scalar_pytorch2
- eltwise.binary.div.div
- eltwise.binary.div.div_sharded
- eltwise.binary.div.div_tensor_pytorch2
- eltwise.binary.div.div_forge
- eltwise.binary.div_no_nan.div_no_nan
- eltwise.binary.div_no_nan.div_no_nan_sharded
- eltwise.binary.logical_or.logical_or_
- eltwise.binary.logical_or.logical_or
- eltwise.binary.logical_or.logical_or_output
Expand Down
121 changes: 121 additions & 0 deletions tests/sweep_framework/sweeps/eltwise/binary/add/add_sharded.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

from typing import Optional, Tuple
from functools import partial

import json
import torch
import random
import ttnn
import math
from tests.sweep_framework.sweep_utils.utils import gen_shapes, sanitize_shape_rm
from tests.sweep_framework.sweep_utils.sharding_utils import gen_sharded_spec_unary, parse_sharding_spec
from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt
from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_rand_inf

from tests.sweep_framework.sweep_utils.sharding_utils import (
gen_sharded_spec_unary,
parse_sharding_spec,
invalidate_vector_sharding,
)
from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
from models.utility_functions import torch_random

# Override the default timeout in seconds for hang detection.
TIMEOUT = 120

random.seed(0)


# Parameters provided to the test vector generator are defined here.
# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values.
# Each suite has a key name (in this case "suite_1" and "suite_2") which will associate the test vectors to this specific suite of inputs.
# Developers can create their own generator functions and pass them to the parameters as inputs.
parameters = {
"nightly": {
"input_spec": gen_sharded_spec_unary(16, layouts=["TILE_LAYOUT"]), # add op only supports tiled layout
"input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
"input_b_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
},
}


# Invalidate vector is called during the generation phase where each vector will be passed in.
# If invalidated, the vector will still be stored but will be skipped.
# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
input_layout = test_vector["input_spec"]["input_layout"]
sharding_invalidated, output_str = invalidate_vector_sharding(test_vector["input_spec"])

if input_layout == "ROW_MAJOR_LAYOUT":
return True, "Inputs to eltwise binary must be tilized"
if sharding_invalidated:
return sharding_invalidated, output_str
return False, None


# This is the run instructions for the test, defined by the developer.
# The run function must take the above-defined parameters as inputs.
# The runner will call this run function with each test vector, and the returned results from this function will be stored.
# If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra.
def run(
input_spec,
input_a_dtype,
input_b_dtype,
*,
device,
) -> list:
data_seed = random.randint(0, 20000000)
torch.manual_seed(data_seed)

(
input_shape,
core_grid,
sharding_strategy,
shard_orientation,
tensor_hw_as_shard_shape,
input_layout,
shard_height_mul_of_32,
) = parse_sharding_spec(input_spec)
torch_input_tensor_a = gen_func_with_cast_tt(
partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype
)(input_shape)
torch_input_tensor_b = gen_func_with_cast_tt(
partial(torch_random, low=-100, high=100, dtype=torch.float32), input_b_dtype
)(input_shape)
golden_function = ttnn.get_golden_function(ttnn.add)
torch_output_tensor = golden_function(torch_input_tensor_a, torch_input_tensor_b)

sharded_config = ttnn.create_sharded_memory_config_(
shape=input_shape,
core_grid=core_grid,
strategy=sharding_strategy,
orientation=shard_orientation,
use_height_and_width_as_shard_shape=tensor_hw_as_shard_shape,
tile_layout=shard_height_mul_of_32,
)
input_tensor_a = ttnn.from_torch(
torch_input_tensor_a,
dtype=input_a_dtype,
layout=input_layout,
device=device,
memory_config=sharded_config,
)

input_tensor_b = ttnn.from_torch(
torch_input_tensor_b,
dtype=input_b_dtype,
layout=input_layout,
device=device,
memory_config=sharded_config,
)

start_time = start_measuring_time()
output_tensor = ttnn.add(input_tensor_a, input_tensor_b, memory_config=sharded_config)
e2e_perf = stop_measuring_time(start_time)
output_tensor = ttnn.to_torch(output_tensor)

pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999)
return [pcc, e2e_perf]
141 changes: 141 additions & 0 deletions tests/sweep_framework/sweeps/eltwise/binary/div/div_sharded.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

from typing import Optional, Tuple
from functools import partial

import json
import torch
import random
import ttnn
import math

import ttnn.device
from tests.sweep_framework.sweep_utils.utils import gen_shapes, sanitize_shape_rm
from tests.sweep_framework.sweep_utils.sharding_utils import gen_sharded_spec_unary, parse_sharding_spec
from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt
from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_rand_inf

from tests.sweep_framework.sweep_utils.sharding_utils import (
gen_sharded_spec_unary,
parse_sharding_spec,
invalidate_vector_sharding,
)
from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
from models.utility_functions import torch_random

# Override the default timeout in seconds for hang detection.
TIMEOUT = 120

random.seed(0)


# Parameters provided to the test vector generator are defined here.
# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values.
# Each suite has a key name (in this case "suite_1" and "suite_2") which will associate the test vectors to this specific suite of inputs.
# Developers can create their own generator functions and pass them to the parameters as inputs.
parameters = {
"nightly": {
"input_spec": gen_sharded_spec_unary(2, layouts=["TILE_LAYOUT"]), # div op only supports tiled layout
"input_a_dtype": [ttnn.bfloat16],
"input_b_dtype": [ttnn.bfloat16],
"accurate_mode": [True, False],
"round_mode": [None, "floor", "trunc"],
},
}


# Invalidate vector is called during the generation phase where each vector will be passed in.
# If invalidated, the vector will still be stored but will be skipped.
# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
input_layout = test_vector["input_spec"]["input_layout"]
sharding_invalidated, output_str = invalidate_vector_sharding(test_vector["input_spec"])

if input_layout == "ROW_MAJOR_LAYOUT":
return True, "Inputs to eltwise binary must be tilized"
if sharding_invalidated:
return sharding_invalidated, output_str
return False, None


# This is the run instructions for the test, defined by the developer.
# The run function must take the above-defined parameters as inputs.
# The runner will call this run function with each test vector, and the returned results from this function will be stored.
# If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra.
def run(
input_spec,
input_a_dtype,
input_b_dtype,
accurate_mode,
round_mode,
*,
device,
) -> list:
data_seed = random.randint(0, 20000000)
torch.manual_seed(data_seed)

(
input_shape,
core_grid,
sharding_strategy,
shard_orientation,
tensor_hw_as_shard_shape,
input_layout,
shard_height_mul_of_32,
) = parse_sharding_spec(input_spec)

torch_input_tensor_a = gen_func_with_cast_tt(
partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype
)(input_shape)

if accurate_mode == False:
torch_input_tensor_b = gen_func_with_cast_tt(
partial(torch_random, low=0.1, high=100, dtype=torch.float32), input_b_dtype
)(input_shape)
signs_b = torch.randint(0, 2, input_shape) * 2 - 1
torch_input_tensor_b *= signs_b
else:
torch_input_tensor_b = gen_func_with_cast_tt(
partial(torch_random, low=-100, high=100, dtype=torch.float32), input_b_dtype
)(input_shape)

golden_function = ttnn.get_golden_function(ttnn.div)
torch_output_tensor = golden_function(torch_input_tensor_a, torch_input_tensor_b)

sharded_config = ttnn.create_sharded_memory_config_(
shape=input_shape,
core_grid=core_grid,
strategy=sharding_strategy,
orientation=shard_orientation,
use_height_and_width_as_shard_shape=tensor_hw_as_shard_shape,
tile_layout=shard_height_mul_of_32,
)

input_tensor_a = ttnn.from_torch(
torch_input_tensor_a,
dtype=input_a_dtype,
layout=input_layout,
device=device,
memory_config=sharded_config,
)

input_tensor_b = ttnn.from_torch(
torch_input_tensor_b,
dtype=input_b_dtype,
layout=input_layout,
device=device,
memory_config=sharded_config,
)

start_time = start_measuring_time()
#
output_tensor = ttnn.div(
input_tensor_a, input_tensor_b, accurate_mode=accurate_mode, round_mode=round_mode, memory_config=sharded_config
)
e2e_perf = stop_measuring_time(start_time)
output_tensor = ttnn.to_torch(output_tensor)

pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999)
return [pcc, e2e_perf]
Loading
Loading