diff --git a/tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_eltwise_block_shard_spec.py b/tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_eltwise_block_shard_spec.py new file mode 100644 index 00000000000..88f9a193b17 --- /dev/null +++ b/tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_eltwise_block_shard_spec.py @@ -0,0 +1,276 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + +from loguru import logger +import random +import pytest +import torch +import ttnn + +from tests.ttnn.utils_for_testing import assert_with_pcc, check_with_pcc +from tests.ttnn.python_api_testing.sweep_tests import ttnn_ops +from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_rand_inf + +Y, X = (8, 8) + + +def run_tests( + input_shape, + dtype, + dlayout, + tensor_memory_layout, + byffer_type, + shard_grid, + shard_shape, + shard_orientation, + halo, + torch_op, + ttnn_op, + gen_infs, + device, +): + random.seed(0) + data_seed = random.randint(0, 20000000) + torch.manual_seed(data_seed) + + if gen_infs: + torch_input_tensor_a = gen_rand_inf(input_shape, low=-100, high=100) + else: + torch_input_tensor_a = torch.Tensor(size=input_shape).uniform_(-50, 50).to(torch.bfloat16) + + torch_output_tensor = torch_input_tensor_a + + shard_spec = ttnn.ShardSpec(shard_grid, shard_shape, shard_orientation, halo) + sharded_config = ttnn.MemoryConfig(tensor_memory_layout, byffer_type, shard_spec) + + input_tensor_a = ttnn.from_torch( + torch_input_tensor_a, + dtype=dtype, + layout=dlayout, + device=device, + memory_config=sharded_config, + ) + + output_tensor = input_tensor_a + output_tensor = ttnn.to_torch(output_tensor) + + [passed, message] = check_with_pcc(torch_output_tensor, output_tensor, 0.999) + assert passed, f"PCC={message}" + + +test_sweep_args = [ + ( + (256, 2, 5, 1536), # Tensor shape + ttnn.bfloat16, # Tensor dtype + ttnn.TILE_LAYOUT, # Tensor layout + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), # core grid + [320, 192], # shard shape + ttnn.ShardOrientation.COL_MAJOR, + False, # halo + ), + ( + (256, 2, 5, 1536), + ttnn.bfloat16, + ttnn.TILE_LAYOUT, + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), # core grid + [320, 192], + ttnn.ShardOrientation.ROW_MAJOR, + False, # halo + ), + ( + (256, 2, 5, 1536), + ttnn.bfloat8_b, + ttnn.TILE_LAYOUT, + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), # core grid + [320, 192], + ttnn.ShardOrientation.COL_MAJOR, + False, # halo + ), + ( + (1, 256, 2, 2304), + ttnn.bfloat16, + ttnn.TILE_LAYOUT, + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), # core grid + [64, 288], + ttnn.ShardOrientation.COL_MAJOR, + False, # halo + ), + ( + (1, 256, 2, 2304), + ttnn.bfloat16, + ttnn.TILE_LAYOUT, + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), # core grid + [64, 288], + ttnn.ShardOrientation.ROW_MAJOR, + False, # halo + ), + ( + (1, 256, 2, 2304), + ttnn.bfloat8_b, + ttnn.TILE_LAYOUT, + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), # core grid + [64, 288], + ttnn.ShardOrientation.COL_MAJOR, + False, # halo + ), + ( + (32, 4, 8, 768), + ttnn.bfloat16, + ttnn.TILE_LAYOUT, + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), # core grid + [128, 96], + ttnn.ShardOrientation.COL_MAJOR, + False, # halo + ), + ( + (32, 4, 8, 768), + ttnn.bfloat16, + ttnn.TILE_LAYOUT, + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), # core grid + [128, 96], + ttnn.ShardOrientation.ROW_MAJOR, + False, # halo + ), + ( + (32, 4, 8, 768), + ttnn.bfloat8_b, + ttnn.TILE_LAYOUT, + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), # core grid + [128, 96], + ttnn.ShardOrientation.COL_MAJOR, + False, # halo + ), + ( + (1, 25, 160, 32), + ttnn.bfloat16, + ttnn.TILE_LAYOUT, + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), # core grid + [32, 160], + ttnn.ShardOrientation.COL_MAJOR, + False, # halo + ), + ( + (1, 25, 160, 32), + ttnn.bfloat8_b, + ttnn.TILE_LAYOUT, + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), # core grid + [32, 160], + ttnn.ShardOrientation.COL_MAJOR, + False, # halo + ), + ( + (1, 2, 1248, 32), + ttnn.bfloat16, + ttnn.TILE_LAYOUT, + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), # core grid + [32, 1248], + ttnn.ShardOrientation.COL_MAJOR, + False, # halo + ), + ( + (1, 2, 1248, 32), + ttnn.bfloat8_b, + ttnn.TILE_LAYOUT, + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), # core grid + [32, 1248], + ttnn.ShardOrientation.COL_MAJOR, + False, # halo + ), + ( + (1, 2, 1472, 32), + ttnn.bfloat16, + ttnn.TILE_LAYOUT, + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), # core grid + [32, 1472], + ttnn.ShardOrientation.COL_MAJOR, + False, # halo + ), + ( + (1, 2, 1472, 32), + ttnn.bfloat8_b, + ttnn.TILE_LAYOUT, + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), # core grid + [32, 1472], + ttnn.ShardOrientation.COL_MAJOR, + False, # halo + ), + ( + (2, 1, 224, 128), + ttnn.bfloat8_b, + ttnn.TILE_LAYOUT, + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}), # core grid + [128, 224], + ttnn.ShardOrientation.COL_MAJOR, + False, # halo + ), +] + + +def nop(x, memory_config=None): + return x + + +@pytest.mark.parametrize( + "input_shape, dtype, dlayout, tensor_memory_layout, byffer_type, shard_grid, shard_shape, shard_orientation, halo", + (test_sweep_args), +) +def test_eltwise_nop( + input_shape, + dtype, + dlayout, + tensor_memory_layout, + byffer_type, + shard_grid, + shard_shape, + shard_orientation, + halo, + device, +): + run_tests( + input_shape, + dtype, + dlayout, + tensor_memory_layout, + byffer_type, + shard_grid, + shard_shape, + shard_orientation, + halo, + nop, + nop, + False, + device, + ) diff --git a/tests/ttnn/unit_tests/gtests/tensor/test_sharding_with_alignment.cpp b/tests/ttnn/unit_tests/gtests/tensor/test_sharding_with_alignment.cpp index 466d602d990..14a421a30be 100644 --- a/tests/ttnn/unit_tests/gtests/tensor/test_sharding_with_alignment.cpp +++ b/tests/ttnn/unit_tests/gtests/tensor/test_sharding_with_alignment.cpp @@ -1049,6 +1049,51 @@ INSTANTIATE_TEST_SUITE_P( CreateShardedTensorWithAlignmentExpected{ .physical_shape = Size{28, 9} } + }, + //////////////////////////////////////////////////////////////////// + // EXAMPLE 4: Some of block sharding failurs + //////////////////////////////////////////////////////////////////// + CreateShardedTensorWithAlignmentParams{ + CreateShardedTensorWithAlignmentInputs{ + .shape = SimpleShape{32, 4, 8, 768}, + .data_type = DataType::BFLOAT16, + .page_config = PageConfig(Layout::TILE), + .memory_config = + MemoryConfig{ + .memory_layout = TensorMemoryLayout::BLOCK_SHARDED, + .buffer_type = BufferType::L1, + .shard_spec = ShardSpec{ + num_cores_to_corerangeset(64, CoreCoord{8, 8}, /*row_wise=*/true), // tt::div_up(32 * 4 * 8, 128) * tt::div_up(768, 96) + {128, 96}, + ShardOrientation::ROW_MAJOR, + false, + ShardMode::PHYSICAL} + } + }, + CreateShardedTensorWithAlignmentExpected{ + .physical_size = Size{1024, 768} + } + }, + CreateShardedTensorWithAlignmentParams{ + CreateShardedTensorWithAlignmentInputs{ + .shape = SimpleShape{32, 4, 8, 768}, + .data_type = DataType::BFLOAT16, + .page_config = PageConfig(Layout::TILE), + .memory_config = + MemoryConfig{ + .memory_layout = TensorMemoryLayout::BLOCK_SHARDED, + .buffer_type = BufferType::L1, + .shard_spec = ShardSpec{ + num_cores_to_corerangeset(64, CoreCoord{8, 8}, /*row_wise=*/true), // tt::div_up(32 * 4 * 8, 128) * tt::div_up(768, 96) + {128, 96}, + ShardOrientation::COL_MAJOR, + false, + ShardMode::PHYSICAL} + } + }, + CreateShardedTensorWithAlignmentExpected{ + .physical_size = Size{1024, 768} + } } ) // Values // clang-format on