diff --git a/tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_eltwise_block_shard_spec.py b/tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_eltwise_block_shard_spec.py
new file mode 100644
index 00000000000..88f9a193b17
--- /dev/null
+++ b/tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_eltwise_block_shard_spec.py
@@ -0,0 +1,276 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from loguru import logger
+import random
+import pytest
+import torch
+import ttnn
+
+from tests.ttnn.utils_for_testing import assert_with_pcc, check_with_pcc
+from tests.ttnn.python_api_testing.sweep_tests import ttnn_ops
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_rand_inf
+
+Y, X = (8, 8)
+
+
+def run_tests(
+    input_shape,
+    dtype,
+    dlayout,
+    tensor_memory_layout,
+    byffer_type,
+    shard_grid,
+    shard_shape,
+    shard_orientation,
+    halo,
+    torch_op,
+    ttnn_op,
+    gen_infs,
+    device,
+):
+    random.seed(0)
+    data_seed = random.randint(0, 20000000)
+    torch.manual_seed(data_seed)
+
+    if gen_infs:
+        torch_input_tensor_a = gen_rand_inf(input_shape, low=-100, high=100)
+    else:
+        torch_input_tensor_a = torch.Tensor(size=input_shape).uniform_(-50, 50).to(torch.bfloat16)
+
+    torch_output_tensor = torch_input_tensor_a
+
+    shard_spec = ttnn.ShardSpec(shard_grid, shard_shape, shard_orientation, halo)
+    sharded_config = ttnn.MemoryConfig(tensor_memory_layout, byffer_type, shard_spec)
+
+    input_tensor_a = ttnn.from_torch(
+        torch_input_tensor_a,
+        dtype=dtype,
+        layout=dlayout,
+        device=device,
+        memory_config=sharded_config,
+    )
+
+    output_tensor = input_tensor_a
+    output_tensor = ttnn.to_torch(output_tensor)
+
+    [passed, message] = check_with_pcc(torch_output_tensor, output_tensor, 0.999)
+    assert passed, f"PCC={message}"
+
+
+test_sweep_args = [
+    (
+        (256, 2, 5, 1536),  # Tensor shape
+        ttnn.bfloat16,  # Tensor dtype
+        ttnn.TILE_LAYOUT,  # Tensor layout
+        ttnn.TensorMemoryLayout.BLOCK_SHARDED,
+        ttnn.BufferType.L1,
+        ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),  # core grid
+        [320, 192],  # shard shape
+        ttnn.ShardOrientation.COL_MAJOR,
+        False,  # halo
+    ),
+    (
+        (256, 2, 5, 1536),
+        ttnn.bfloat16,
+        ttnn.TILE_LAYOUT,
+        ttnn.TensorMemoryLayout.BLOCK_SHARDED,
+        ttnn.BufferType.L1,
+        ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),  # core grid
+        [320, 192],
+        ttnn.ShardOrientation.ROW_MAJOR,
+        False,  # halo
+    ),
+    (
+        (256, 2, 5, 1536),
+        ttnn.bfloat8_b,
+        ttnn.TILE_LAYOUT,
+        ttnn.TensorMemoryLayout.BLOCK_SHARDED,
+        ttnn.BufferType.L1,
+        ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),  # core grid
+        [320, 192],
+        ttnn.ShardOrientation.COL_MAJOR,
+        False,  # halo
+    ),
+    (
+        (1, 256, 2, 2304),
+        ttnn.bfloat16,
+        ttnn.TILE_LAYOUT,
+        ttnn.TensorMemoryLayout.BLOCK_SHARDED,
+        ttnn.BufferType.L1,
+        ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),  # core grid
+        [64, 288],
+        ttnn.ShardOrientation.COL_MAJOR,
+        False,  # halo
+    ),
+    (
+        (1, 256, 2, 2304),
+        ttnn.bfloat16,
+        ttnn.TILE_LAYOUT,
+        ttnn.TensorMemoryLayout.BLOCK_SHARDED,
+        ttnn.BufferType.L1,
+        ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),  # core grid
+        [64, 288],
+        ttnn.ShardOrientation.ROW_MAJOR,
+        False,  # halo
+    ),
+    (
+        (1, 256, 2, 2304),
+        ttnn.bfloat8_b,
+        ttnn.TILE_LAYOUT,
+        ttnn.TensorMemoryLayout.BLOCK_SHARDED,
+        ttnn.BufferType.L1,
+        ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),  # core grid
+        [64, 288],
+        ttnn.ShardOrientation.COL_MAJOR,
+        False,  # halo
+    ),
+    (
+        (32, 4, 8, 768),
+        ttnn.bfloat16,
+        ttnn.TILE_LAYOUT,
+        ttnn.TensorMemoryLayout.BLOCK_SHARDED,
+        ttnn.BufferType.L1,
+        ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),  # core grid
+        [128, 96],
+        ttnn.ShardOrientation.COL_MAJOR,
+        False,  # halo
+    ),
+    (
+        (32, 4, 8, 768),
+        ttnn.bfloat16,
+        ttnn.TILE_LAYOUT,
+        ttnn.TensorMemoryLayout.BLOCK_SHARDED,
+        ttnn.BufferType.L1,
+        ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),  # core grid
+        [128, 96],
+        ttnn.ShardOrientation.ROW_MAJOR,
+        False,  # halo
+    ),
+    (
+        (32, 4, 8, 768),
+        ttnn.bfloat8_b,
+        ttnn.TILE_LAYOUT,
+        ttnn.TensorMemoryLayout.BLOCK_SHARDED,
+        ttnn.BufferType.L1,
+        ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),  # core grid
+        [128, 96],
+        ttnn.ShardOrientation.COL_MAJOR,
+        False,  # halo
+    ),
+    (
+        (1, 25, 160, 32),
+        ttnn.bfloat16,
+        ttnn.TILE_LAYOUT,
+        ttnn.TensorMemoryLayout.BLOCK_SHARDED,
+        ttnn.BufferType.L1,
+        ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),  # core grid
+        [32, 160],
+        ttnn.ShardOrientation.COL_MAJOR,
+        False,  # halo
+    ),
+    (
+        (1, 25, 160, 32),
+        ttnn.bfloat8_b,
+        ttnn.TILE_LAYOUT,
+        ttnn.TensorMemoryLayout.BLOCK_SHARDED,
+        ttnn.BufferType.L1,
+        ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),  # core grid
+        [32, 160],
+        ttnn.ShardOrientation.COL_MAJOR,
+        False,  # halo
+    ),
+    (
+        (1, 2, 1248, 32),
+        ttnn.bfloat16,
+        ttnn.TILE_LAYOUT,
+        ttnn.TensorMemoryLayout.BLOCK_SHARDED,
+        ttnn.BufferType.L1,
+        ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),  # core grid
+        [32, 1248],
+        ttnn.ShardOrientation.COL_MAJOR,
+        False,  # halo
+    ),
+    (
+        (1, 2, 1248, 32),
+        ttnn.bfloat8_b,
+        ttnn.TILE_LAYOUT,
+        ttnn.TensorMemoryLayout.BLOCK_SHARDED,
+        ttnn.BufferType.L1,
+        ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),  # core grid
+        [32, 1248],
+        ttnn.ShardOrientation.COL_MAJOR,
+        False,  # halo
+    ),
+    (
+        (1, 2, 1472, 32),
+        ttnn.bfloat16,
+        ttnn.TILE_LAYOUT,
+        ttnn.TensorMemoryLayout.BLOCK_SHARDED,
+        ttnn.BufferType.L1,
+        ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),  # core grid
+        [32, 1472],
+        ttnn.ShardOrientation.COL_MAJOR,
+        False,  # halo
+    ),
+    (
+        (1, 2, 1472, 32),
+        ttnn.bfloat8_b,
+        ttnn.TILE_LAYOUT,
+        ttnn.TensorMemoryLayout.BLOCK_SHARDED,
+        ttnn.BufferType.L1,
+        ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),  # core grid
+        [32, 1472],
+        ttnn.ShardOrientation.COL_MAJOR,
+        False,  # halo
+    ),
+    (
+        (2, 1, 224, 128),
+        ttnn.bfloat8_b,
+        ttnn.TILE_LAYOUT,
+        ttnn.TensorMemoryLayout.BLOCK_SHARDED,
+        ttnn.BufferType.L1,
+        ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(7, 7))}),  # core grid
+        [128, 224],
+        ttnn.ShardOrientation.COL_MAJOR,
+        False,  # halo
+    ),
+]
+
+
+def nop(x, memory_config=None):
+    return x
+
+
+@pytest.mark.parametrize(
+    "input_shape, dtype, dlayout, tensor_memory_layout, byffer_type, shard_grid, shard_shape, shard_orientation, halo",
+    (test_sweep_args),
+)
+def test_eltwise_nop(
+    input_shape,
+    dtype,
+    dlayout,
+    tensor_memory_layout,
+    byffer_type,
+    shard_grid,
+    shard_shape,
+    shard_orientation,
+    halo,
+    device,
+):
+    run_tests(
+        input_shape,
+        dtype,
+        dlayout,
+        tensor_memory_layout,
+        byffer_type,
+        shard_grid,
+        shard_shape,
+        shard_orientation,
+        halo,
+        nop,
+        nop,
+        False,
+        device,
+    )
diff --git a/tests/ttnn/unit_tests/gtests/tensor/test_sharding_with_alignment.cpp b/tests/ttnn/unit_tests/gtests/tensor/test_sharding_with_alignment.cpp
index 466d602d990..14a421a30be 100644
--- a/tests/ttnn/unit_tests/gtests/tensor/test_sharding_with_alignment.cpp
+++ b/tests/ttnn/unit_tests/gtests/tensor/test_sharding_with_alignment.cpp
@@ -1049,6 +1049,51 @@ INSTANTIATE_TEST_SUITE_P(
             CreateShardedTensorWithAlignmentExpected{
                 .physical_shape = Size{28, 9}
             }
+        },
+        ////////////////////////////////////////////////////////////////////
+        // EXAMPLE 4: Some of block sharding failurs
+        ////////////////////////////////////////////////////////////////////
+        CreateShardedTensorWithAlignmentParams{
+            CreateShardedTensorWithAlignmentInputs{
+                .shape = SimpleShape{32, 4, 8, 768},
+                .data_type = DataType::BFLOAT16,
+                .page_config = PageConfig(Layout::TILE),
+                .memory_config =
+                    MemoryConfig{
+                        .memory_layout = TensorMemoryLayout::BLOCK_SHARDED,
+                        .buffer_type = BufferType::L1,
+                        .shard_spec = ShardSpec{
+                            num_cores_to_corerangeset(64, CoreCoord{8, 8}, /*row_wise=*/true), // tt::div_up(32 * 4 * 8, 128) * tt::div_up(768, 96)
+                            {128, 96},
+                            ShardOrientation::ROW_MAJOR,
+                            false,
+                            ShardMode::PHYSICAL}
+                    }
+            },
+            CreateShardedTensorWithAlignmentExpected{
+                .physical_size = Size{1024, 768}
+            }
+        },
+        CreateShardedTensorWithAlignmentParams{
+            CreateShardedTensorWithAlignmentInputs{
+                .shape = SimpleShape{32, 4, 8, 768},
+                .data_type = DataType::BFLOAT16,
+                .page_config = PageConfig(Layout::TILE),
+                .memory_config =
+                    MemoryConfig{
+                        .memory_layout = TensorMemoryLayout::BLOCK_SHARDED,
+                        .buffer_type = BufferType::L1,
+                        .shard_spec = ShardSpec{
+                            num_cores_to_corerangeset(64, CoreCoord{8, 8}, /*row_wise=*/true), // tt::div_up(32 * 4 * 8, 128) * tt::div_up(768, 96)
+                            {128, 96},
+                            ShardOrientation::COL_MAJOR,
+                            false,
+                            ShardMode::PHYSICAL}
+                    }
+            },
+            CreateShardedTensorWithAlignmentExpected{
+                .physical_size = Size{1024, 768}
+            }
         }
     )  // Values
     // clang-format on