From 4b9b061b903ed0375629db10e6a1feacb349df46 Mon Sep 17 00:00:00 2001
From: amalbasaTT <amalbasa@tenstorrent.com>
Date: Tue, 24 Dec 2024 14:28:20 +0000
Subject: [PATCH] #13776: Add hypot_bw_nonzero and div_bw_nonzero sweeps

---
 .github/workflows/ttnn-run-sweeps.yaml        |   3 +-
 .../eltwise/binary/hypot/hypot_nonzero.py     |  99 ------------
 .../binary_backward/div_bw/div_bw_nonzero.py  | 145 +++++++++++++++++
 .../hypot_bw/hypot_bw_nonzero.py              | 150 ++++++++++++++++++
 .../unary_backward/rdiv_bw/rdiv_bw_nonzero.py |  16 +-
 .../test_backward_div_nonzero_bfloat8b.py     | 116 ++++++++++++++
 .../test_backward_hypot_nonzero_bfloat8b.py   | 110 +++++++++++++
 .../test_backward_rdiv_nonzero_bfloat8b.py    | 103 ++++++++++++
 8 files changed, 633 insertions(+), 109 deletions(-)
 delete mode 100644 tests/sweep_framework/sweeps/eltwise/binary/hypot/hypot_nonzero.py
 create mode 100644 tests/sweep_framework/sweeps/eltwise/binary_backward/div_bw/div_bw_nonzero.py
 create mode 100644 tests/sweep_framework/sweeps/eltwise/binary_backward/hypot_bw/hypot_bw_nonzero.py
 create mode 100644 tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_backward_div_nonzero_bfloat8b.py
 create mode 100644 tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_backward_hypot_nonzero_bfloat8b.py
 create mode 100644 tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_backward_rdiv_nonzero_bfloat8b.py

diff --git a/.github/workflows/ttnn-run-sweeps.yaml b/.github/workflows/ttnn-run-sweeps.yaml
index a7062de178d..f2837a837a7 100644
--- a/.github/workflows/ttnn-run-sweeps.yaml
+++ b/.github/workflows/ttnn-run-sweeps.yaml
@@ -279,7 +279,6 @@ on:
           - eltwise.binary.ne.ne_scalar_pytorch2
           - eltwise.binary.ne.ne_forge
           - eltwise.binary.hypot.hypot
-          - eltwise.binary.hypot.hypot_nonzero
           - eltwise.binary.xlogy.xlogy
           - eltwise.binary_backward.ldexp_bw.ldexp_bw
           - eltwise.binary_backward.logaddexp_bw
@@ -288,10 +287,12 @@ on:
           - eltwise.binary_backward.subalpha_bw.subalpha_bw
           - eltwise.binary_backward.xlogy_bw.xlogy_bw
           - eltwise.binary_backward.hypot_bw.hypot_bw
+          - eltwise.binary_backward.hypot_bw.hypot_bw_nonzero
           - eltwise.binary_backward.add_bw.add_bw
           - eltwise.binary_backward.sub_bw.sub_bw
           - eltwise.binary_backward.mul_bw.mul_bw
           - eltwise.binary_backward.div_bw.div_bw
+          - eltwise.binary_backward.div_bw.div_bw_nonzero
           - eltwise.binary_backward.fmod_bw.fmod_bw
           - eltwise.binary_backward.remainder_bw.remainder_bw
           - eltwise.binary_backward.rsub_bw.rsub_bw
diff --git a/tests/sweep_framework/sweeps/eltwise/binary/hypot/hypot_nonzero.py b/tests/sweep_framework/sweeps/eltwise/binary/hypot/hypot_nonzero.py
deleted file mode 100644
index 0c1715e6b70..00000000000
--- a/tests/sweep_framework/sweeps/eltwise/binary/hypot/hypot_nonzero.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
-
-# SPDX-License-Identifier: Apache-2.0
-
-from typing import Optional, Tuple
-from functools import partial
-
-import torch
-import random
-import ttnn
-from tests.sweep_framework.sweep_utils.utils import gen_shapes
-from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt
-
-from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
-from models.utility_functions import torch_random
-
-
-# Parameters provided to the test vector generator are defined here.
-# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values.
-# Each suite has a key name (in this case "suite_1") which will associate the test vectors to this specific suite of inputs.
-# Developers can create their own generator functions and pass them to the parameters as inputs.
-parameters = {
-    "nightly": {
-        "input_shape": gen_shapes([1, 1, 1, 1], [6, 12, 256, 256], [1, 1, 1, 1], 16)
-        + gen_shapes([1, 1, 1], [12, 256, 256], [1, 1, 1], 16)
-        + gen_shapes([1, 1], [256, 256], [1, 1], 16),
-        "input_a_dtype": [ttnn.bfloat8_b],
-        "input_b_dtype": [ttnn.bfloat8_b],
-        "input_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT],
-        "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
-        "input_b_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
-    },
-}
-
-
-# Invalidate vector is called during the generation phase where each vector will be passed in.
-# If invalidated, the vector will still be stored but will be skipped.
-# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
-def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
-    if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT or test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT:
-        return True, "Row Major layout is not supported"
-    return False, None
-
-
-# This is the run instructions for the test, defined by the developer.
-# The run function must take the above-defined parameters as inputs.
-# The runner will call this run function with each test vector, and the returned results from this function will be stored.
-# If you defined a device_mesh_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra.
-def run(
-    input_shape,
-    input_a_dtype,
-    input_b_dtype,
-    input_layout,
-    input_a_memory_config,
-    input_b_memory_config,
-    *,
-    device,
-) -> list:
-    torch.manual_seed(0)
-
-    torch_input_tensor_a = gen_func_with_cast_tt(
-        partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype
-    )(input_shape)
-    torch_input_tensor_b = gen_func_with_cast_tt(
-        partial(torch_random, low=-100, high=100, dtype=torch.float32), input_b_dtype
-    )(input_shape)
-
-    while torch.any(torch_input_tensor_a == 0.0):
-        torch_input_tensor_a = torch.where(torch_input_tensor_a == 0.0, random.uniform(-100, 100), torch_input_tensor_a)
-    while torch.any(torch_input_tensor_b == 0.0):
-        torch_input_tensor_b = torch.where(torch_input_tensor_b == 0.0, random.uniform(-100, 100), torch_input_tensor_b)
-
-    assert not torch.any(torch_input_tensor_a == 0)
-    assert not torch.any(torch_input_tensor_b == 0)
-
-    golden_function = ttnn.get_golden_function(ttnn.hypot)
-    torch_output_tensor = golden_function(torch_input_tensor_a, torch_input_tensor_b)
-
-    input_tensor_a = ttnn.from_torch(
-        torch_input_tensor_a,
-        dtype=input_a_dtype,
-        layout=input_layout,
-        device=device,
-        memory_config=input_a_memory_config,
-    )
-    input_tensor_b = ttnn.from_torch(
-        torch_input_tensor_b,
-        dtype=input_b_dtype,
-        layout=input_layout,
-        device=device,
-        memory_config=input_b_memory_config,
-    )
-    start_time = start_measuring_time()
-    result = ttnn.hypot(input_tensor_a, input_tensor_b)
-    e2e_perf = stop_measuring_time(start_time)
-
-    output_tensor = ttnn.to_torch(result)
-
-    return [check_with_pcc(torch_output_tensor, output_tensor, 0.999), e2e_perf]
diff --git a/tests/sweep_framework/sweeps/eltwise/binary_backward/div_bw/div_bw_nonzero.py b/tests/sweep_framework/sweeps/eltwise/binary_backward/div_bw/div_bw_nonzero.py
new file mode 100644
index 00000000000..41039018d33
--- /dev/null
+++ b/tests/sweep_framework/sweeps/eltwise/binary_backward/div_bw/div_bw_nonzero.py
@@ -0,0 +1,145 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+from functools import partial
+import random
+
+import torch
+import ttnn
+
+from tests.sweep_framework.sweep_utils.utils import gen_shapes, gen_rand_exclude_range, sanitize_shape_rm
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+
+# Parameters provided to the test vector generator are defined here.
+# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values.
+# Each suite has a key name (in this case "suite_1" and "suite_2") which will associate the test vectors to this specific suite of inputs.
+# Developers can create their own generator functions and pass them to the parameters as inputs.
+parameters = {
+    "nightly": {
+        "input_shape": gen_shapes([1, 1, 1, 1], [6, 12, 256, 256], [1, 1, 1, 1], 8)
+        + gen_shapes([1, 1, 1], [12, 256, 256], [1, 1, 1], 8)
+        + gen_shapes([1, 1], [256, 256], [1, 1], 8),
+        "exclude_range": [[-1, 1]],
+        "round_mode": [None, "floor", "trunc"],
+        "grad_dtype": [ttnn.bfloat8_b],
+        "input_a_dtype": [ttnn.bfloat8_b],
+        "input_b_dtype": [ttnn.bfloat8_b],
+        "input_layout": [ttnn.TILE_LAYOUT],
+        "grad_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+        "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+        "input_b_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+        "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+    },
+}
+
+
+# Invalidate vector is called during the generation phase where each vector will be passed in.
+# If invalidated, the vector will still be stored but will be skipped.
+# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
+def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
+    if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT:
+        return True, "Unary operation requires tensor to be in Tile layout when working with non-sharded input tensor"
+    if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and (
+        test_vector["grad_dtype"] == ttnn.bfloat8_b
+        or test_vector["input_a_dtype"] == ttnn.bfloat8_b
+        or test_vector["input_b_dtype"] == ttnn.bfloat8_b
+    ):
+        return True, "bfloat8_b is only supported on tiled layout"
+    return False, None
+
+
+# This is the run instructions for the test, defined by the developer.
+# The run function must take the above-defined parameters as inputs.
+# The runner will call this run function with each test vector, and the returned results from this function will be stored.
+# If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra.
+def run(
+    input_shape,
+    exclude_range,
+    round_mode,
+    grad_dtype,
+    input_a_dtype,
+    input_b_dtype,
+    input_layout,
+    grad_memory_config,
+    input_a_memory_config,
+    input_b_memory_config,
+    output_memory_config,
+    *,
+    device,
+) -> list:
+    torch.manual_seed(0)
+
+    if input_layout == ttnn.ROW_MAJOR_LAYOUT:
+        input_shape = sanitize_shape_rm(input_shape)
+
+    torch_grad_tensor = gen_func_with_cast_tt(
+        partial(gen_rand_exclude_range, excluderange=exclude_range, low=-100, high=100), grad_dtype
+    )(input_shape)
+    torch_input_tensor_a = gen_func_with_cast_tt(
+        partial(gen_rand_exclude_range, excluderange=exclude_range, low=-100, high=100), input_a_dtype
+    )(input_shape)
+    torch_input_tensor_b = gen_func_with_cast_tt(
+        partial(gen_rand_exclude_range, excluderange=exclude_range, low=-100, high=100), input_b_dtype
+    )(input_shape)
+    torch_input_tensor_a.requires_grad = True
+    torch_input_tensor_b.requires_grad = True
+
+    assert not torch.any(torch_grad_tensor == 0.0)
+    assert not torch.any(torch_input_tensor_a == 0.0)
+    assert not torch.any(torch_input_tensor_b == 0.0)
+
+    golden_function = ttnn.get_golden_function(ttnn.div_bw)
+    torch_output_tensors = golden_function(
+        torch_grad_tensor, torch_input_tensor_a, torch_input_tensor_b, round_mode if round_mode != "None" else None
+    )
+
+    grad_tensor = ttnn.from_torch(
+        torch_grad_tensor,
+        dtype=grad_dtype,
+        layout=input_layout,
+        device=device,
+        memory_config=grad_memory_config,
+    )
+    input_tensor_a = ttnn.from_torch(
+        torch_input_tensor_a,
+        dtype=input_a_dtype,
+        layout=input_layout,
+        device=device,
+        memory_config=input_a_memory_config,
+    )
+    input_tensor_b = ttnn.from_torch(
+        torch_input_tensor_b,
+        dtype=input_b_dtype,
+        layout=input_layout,
+        device=device,
+        memory_config=input_b_memory_config,
+    )
+
+    start_time = start_measuring_time()
+    output_tensors = ttnn.div_bw(
+        grad_tensor, input_tensor_a, input_tensor_b, round_mode=round_mode, memory_config=output_memory_config
+    )
+    e2e_perf = stop_measuring_time(start_time)
+
+    passed = []
+    output_string = ""
+    for i in range(len(torch_output_tensors)):
+        output_tensor = ttnn.to_torch(output_tensors[i])
+        passed_, output_string_ = check_with_pcc(torch_output_tensors[i], output_tensor, 0.999)
+        passed.append(passed_)
+        output_string += output_string_ + ", "
+
+    if all(passed):
+        passed = True
+    else:
+        passed = False
+
+    output_string = output_string[:-2]
+
+    return [(passed, output_string), e2e_perf]
diff --git a/tests/sweep_framework/sweeps/eltwise/binary_backward/hypot_bw/hypot_bw_nonzero.py b/tests/sweep_framework/sweeps/eltwise/binary_backward/hypot_bw/hypot_bw_nonzero.py
new file mode 100644
index 00000000000..65eb0a4c9f9
--- /dev/null
+++ b/tests/sweep_framework/sweeps/eltwise/binary_backward/hypot_bw/hypot_bw_nonzero.py
@@ -0,0 +1,150 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+from functools import partial
+import random
+
+import torch
+import ttnn
+
+from tests.sweep_framework.sweep_utils.utils import gen_shapes, gen_rand_exclude_range, sanitize_shape_rm
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+
+# Parameters provided to the test vector generator are defined here.
+# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values.
+# Each suite has a key name (in this case "suite_1" and "suite_2") which will associate the test vectors to this specific suite of inputs.
+# Developers can create their own generator functions and pass them to the parameters as inputs.
+parameters = {
+    "nightly": {
+        "input_shape": gen_shapes([1, 1, 1, 1], [6, 12, 256, 256], [1, 1, 1, 1], 8)
+        + gen_shapes([1, 1, 1], [12, 256, 256], [1, 1, 1], 8)
+        + gen_shapes([1, 1], [256, 256], [1, 1], 8),
+        "exclude_range": [[-1, 1]],
+        "grad_dtype": [ttnn.bfloat8_b],
+        "input_a_dtype": [ttnn.bfloat8_b],
+        "input_b_dtype": [ttnn.bfloat8_b],
+        "input_layout": [ttnn.TILE_LAYOUT],
+        "grad_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+        "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+        "input_b_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+        "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+    },
+}
+
+
+# Invalidate vector is called during the generation phase where each vector will be passed in.
+# If invalidated, the vector will still be stored but will be skipped.
+# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
+def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
+    if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT:
+        return True, "Unary operation requires tensor to be in Tile layout when working with non-sharded input tensor"
+    if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and (
+        test_vector["input_dtype"] == ttnn.bfloat8_b or test_vector["input_dtype"] == ttnn.bfloat8_b
+    ):
+        return True, "bfloat8_b is only supported on tiled layout"
+    return False, None
+
+
+# This is the run instructions for the test, defined by the developer.
+# The run function must take the above-defined parameters as inputs.
+# The runner will call this run function with each test vector, and the returned results from this function will be stored.
+# If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra.
+def run(
+    input_shape,
+    exclude_range,
+    grad_dtype,
+    input_a_dtype,
+    input_b_dtype,
+    input_layout,
+    grad_memory_config,
+    input_a_memory_config,
+    input_b_memory_config,
+    output_memory_config,
+    *,
+    device,
+) -> list:
+    torch.manual_seed(0)
+
+    if input_layout == ttnn.ROW_MAJOR_LAYOUT:
+        input_shape = sanitize_shape_rm(input_shape)
+
+    torch_grad_tensor = gen_func_with_cast_tt(
+        partial(gen_rand_exclude_range, excluderange=exclude_range, low=-100, high=100), grad_dtype
+    )(input_shape)
+    torch_input_tensor_a = gen_func_with_cast_tt(
+        partial(gen_rand_exclude_range, excluderange=exclude_range, low=-100, high=100), input_a_dtype
+    )(input_shape)
+    torch_input_tensor_b = gen_func_with_cast_tt(
+        partial(gen_rand_exclude_range, excluderange=exclude_range, low=-100, high=100), input_b_dtype
+    )(input_shape)
+    torch_input_tensor_a.requires_grad = True
+    torch_input_tensor_b.requires_grad = True
+
+    # while torch.any(torch_grad_tensor == 0.0):
+    #   torch_grad_tensor = torch.where(torch_grad_tensor == 0.0,
+    #                                  torch_grad_tensor.flatten()[random.choice(torch.where(torch_grad_tensor.flatten() != 0.0)[0])].item(),
+    #                                  torch_grad_tensor)
+    # while torch.any(torch_input_tensor_a == 0.0):
+    #   torch_input_tensor_a = torch.where(torch_input_tensor_a == 0.0,
+    #                                 torch_input_tensor_a.flatten()[random.choice(torch.where(torch_input_tensor_a.flatten() != 0.0)[0])].item(),
+    #                                 torch_input_tensor_a)
+    # while torch.any(torch_input_tensor_b == 0.0):
+    #   torch_input_tensor_b = torch.where(torch_input_tensor_b == 0.0,
+    #                                 torch_input_tensor_b.flatten()[random.choice(torch.where(torch_input_tensor_b.flatten() != 0.0)[0])].item(),
+    #                                 torch_input_tensor_b)
+
+    assert not torch.any(torch_grad_tensor == 0.0)
+    assert not torch.any(torch_input_tensor_a == 0.0)
+    assert not torch.any(torch_input_tensor_b == 0.0)
+
+    golden_function = ttnn.get_golden_function(ttnn.hypot_bw)
+    torch_output_tensors = golden_function(torch_grad_tensor, torch_input_tensor_a, torch_input_tensor_b)
+
+    grad_tensor = ttnn.from_torch(
+        torch_grad_tensor,
+        dtype=grad_dtype,
+        layout=input_layout,
+        device=device,
+        memory_config=grad_memory_config,
+    )
+    input_tensor_a = ttnn.from_torch(
+        torch_input_tensor_a,
+        dtype=input_a_dtype,
+        layout=input_layout,
+        device=device,
+        memory_config=input_a_memory_config,
+    )
+    input_tensor_b = ttnn.from_torch(
+        torch_input_tensor_b,
+        dtype=input_b_dtype,
+        layout=input_layout,
+        device=device,
+        memory_config=input_b_memory_config,
+    )
+
+    start_time = start_measuring_time()
+    output_tensors = ttnn.hypot_bw(grad_tensor, input_tensor_a, input_tensor_b, memory_config=output_memory_config)
+    e2e_perf = stop_measuring_time(start_time)
+
+    passed = []
+    output_string = ""
+    for i in range(len(torch_output_tensors)):
+        output_tensor = ttnn.to_torch(output_tensors[i])
+        passed_, output_string_ = check_with_pcc(torch_output_tensors[i], output_tensor, 0.999)
+        passed.append(passed_)
+        output_string += output_string_ + ", "
+
+    if all(passed):
+        passed = True
+    else:
+        passed = False
+
+    output_string = output_string[:-2]
+
+    return [(passed, output_string), e2e_perf]
diff --git a/tests/sweep_framework/sweeps/eltwise/unary_backward/rdiv_bw/rdiv_bw_nonzero.py b/tests/sweep_framework/sweeps/eltwise/unary_backward/rdiv_bw/rdiv_bw_nonzero.py
index 06d5448a809..5cdb8013fb5 100644
--- a/tests/sweep_framework/sweeps/eltwise/unary_backward/rdiv_bw/rdiv_bw_nonzero.py
+++ b/tests/sweep_framework/sweeps/eltwise/unary_backward/rdiv_bw/rdiv_bw_nonzero.py
@@ -25,6 +25,7 @@
         "input_shape": gen_shapes([1, 1, 1, 1], [6, 12, 256, 256], [1, 1, 1, 1], 8)
         + gen_shapes([1, 1, 1], [12, 256, 256], [1, 1, 1], 8)
         + gen_shapes([1, 1], [256, 256], [1, 1], 8),
+        "exclude_range": [[-1, 1]],
         "grad_dtype": [ttnn.bfloat8_b],
         "input_a_dtype": [ttnn.bfloat8_b],
         "input_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT],
@@ -54,6 +55,7 @@ def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
 # If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra.
 def run(
     input_shape,
+    exclude_range,
     grad_dtype,
     input_a_dtype,
     input_layout,
@@ -69,20 +71,15 @@ def run(
         input_shape = sanitize_shape_rm(input_shape)
 
     torch_grad_tensor = gen_func_with_cast_tt(
-        partial(torch_random, low=-100, high=100, dtype=torch.float32), grad_dtype
+        partial(gen_rand_exclude_range, excluderange=exclude_range, low=-100, high=100), grad_dtype
     )(input_shape)
     torch_input_tensor_a = gen_func_with_cast_tt(
-        partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype
+        partial(gen_rand_exclude_range, excluderange=exclude_range, low=-100, high=100), input_a_dtype
     )(input_shape)
     torch_input_tensor_a.requires_grad = True
 
     factor = torch.tensor(1, dtype=torch.bfloat16).uniform_(-100, 100).item()
-
-    while torch.any(torch_grad_tensor == 0.0):
-        torch_grad_tensor = torch.where(torch_grad_tensor == 0.0, random.uniform(-100, 100), torch_grad_tensor)
-    while torch.any(torch_input_tensor_a == 0.0):
-        torch_input_tensor_a = torch.where(torch_input_tensor_a == 0.0, random.uniform(-100, 100), torch_input_tensor_a)
-    while factor == 0.0:
+    while (factor > exclude_range[0]) & (factor < exclude_range[1]):
         factor = torch.tensor(1, dtype=torch.bfloat16).uniform_(-100, 100).item()
 
     assert not torch.any(torch_grad_tensor == 0)
@@ -110,7 +107,8 @@ def run(
 
     start_time = start_measuring_time()
     output_tensor = ttnn.rdiv_bw(grad_tensor, input_tensor_a, scalar=factor, memory_config=output_memory_config)[0]
-    output_tensor = ttnn.to_torch(output_tensor)
     e2e_perf = stop_measuring_time(start_time)
 
+    output_tensor = ttnn.to_torch(output_tensor)
+
     return [check_with_pcc(torch_output_tensor, output_tensor, 0.999), e2e_perf]
diff --git a/tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_backward_div_nonzero_bfloat8b.py b/tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_backward_div_nonzero_bfloat8b.py
new file mode 100644
index 00000000000..053258c4060
--- /dev/null
+++ b/tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_backward_div_nonzero_bfloat8b.py
@@ -0,0 +1,116 @@
+# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from loguru import logger
+import random
+import pytest
+import torch
+import ttnn
+import traceback
+from itertools import product
+from functools import partial
+
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt
+from tests.sweep_framework.sweep_utils.utils import gen_rand_exclude_range
+
+from tests.ttnn.utils_for_testing import assert_with_pcc
+from tests.ttnn.python_api_testing.sweep_tests import ttnn_ops
+
+
+def run_backward_div_tests(
+    input_shape,
+    round_mode,
+    exclude_range,
+    dtype,
+    dlayout,
+    in_mem_config,
+    output_mem_config,
+    data_seed,
+    device,
+):
+    torch.manual_seed(data_seed)
+
+    x = gen_func_with_cast_tt(
+        partial(gen_rand_exclude_range, excluderange=exclude_range[0], low=-100, high=100), dtype[0]
+    )(input_shape[0])
+    y = gen_func_with_cast_tt(
+        partial(gen_rand_exclude_range, excluderange=exclude_range[0], low=-100, high=100), dtype[1]
+    )(input_shape[0])
+    z = gen_func_with_cast_tt(
+        partial(gen_rand_exclude_range, excluderange=exclude_range[0], low=-100, high=100), dtype[2]
+    )(input_shape[0])
+
+    y.requires_grad = True
+    z.requires_grad = True
+
+    try:
+        # get ref result
+        golden_function = ttnn.get_golden_function(ttnn.div_bw)
+        ref_value = golden_function(x, y, z, round_mode if round_mode != "None" else None)
+
+        tt_x = ttnn_ops.setup_ttnn_tensor(x, device, dlayout[0], in_mem_config[0], dtype[0])
+        tt_y = ttnn_ops.setup_ttnn_tensor(y, device, dlayout[0], in_mem_config[1], dtype[1])
+        tt_z = ttnn_ops.setup_ttnn_tensor(z, device, dlayout[0], in_mem_config[2], dtype[2])
+
+        tt_result = ttnn.div_bw(tt_x, tt_y, tt_z, round_mode=round_mode, memory_config=output_mem_config)
+        tt_result = [
+            ttnn_ops.ttnn_tensor_to_torch(tt_result[0]),
+            ttnn_ops.ttnn_tensor_to_torch(tt_result[1]),
+        ]
+
+    except Exception as e:
+        logger.warning(f"Test execution crashed: {e}")
+        print(traceback.format_exc())
+        raise e
+
+    for i in range(2):
+        assert len(tt_result[i].shape) == len(ref_value[i].shape)
+        assert tt_result[i].shape == ref_value[i].shape
+        assert_with_pcc(ref_value[i], tt_result[i], 0.99)
+
+
+test_sweep_args = [
+    (
+        [[3, 107, 11]],
+        None,
+        [[-1, 1]],
+        [ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.bfloat8_b],
+        [ttnn.TILE_LAYOUT],
+        [ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG],
+        ttnn.DRAM_MEMORY_CONFIG,
+        0,
+    ),
+    (
+        [[107, 11]],
+        None,
+        [[-1, 1]],
+        [ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.bfloat8_b],
+        [ttnn.TILE_LAYOUT],
+        [ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG],
+        ttnn.DRAM_MEMORY_CONFIG,
+        0,
+    ),
+    (
+        [[50, 10]],
+        None,
+        [[-1, 1]],
+        [ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.bfloat8_b],
+        [ttnn.TILE_LAYOUT],
+        [ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG],
+        ttnn.DRAM_MEMORY_CONFIG,
+        0,
+    ),
+]
+
+
+@pytest.mark.parametrize(
+    "input_shape, round_mode, exclude_range, dtype, dlayout, in_mem_config, out_mem_config, data_seed",
+    (test_sweep_args),
+)
+def test_backward_div(
+    input_shape, round_mode, exclude_range, dtype, dlayout, in_mem_config, out_mem_config, data_seed, device
+):
+    run_backward_div_tests(
+        input_shape, round_mode, exclude_range, dtype, dlayout, in_mem_config, out_mem_config, data_seed, device
+    )
diff --git a/tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_backward_hypot_nonzero_bfloat8b.py b/tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_backward_hypot_nonzero_bfloat8b.py
new file mode 100644
index 00000000000..74502e89bbe
--- /dev/null
+++ b/tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_backward_hypot_nonzero_bfloat8b.py
@@ -0,0 +1,110 @@
+# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from loguru import logger
+import random
+import pytest
+import torch
+import ttnn
+import traceback
+from itertools import product
+from functools import partial
+
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt
+from tests.sweep_framework.sweep_utils.utils import gen_rand_exclude_range
+
+from tests.ttnn.utils_for_testing import assert_with_pcc
+from tests.ttnn.python_api_testing.sweep_tests import ttnn_ops
+
+
+def run_backward_hypot_tests(
+    input_shape,
+    exclude_range,
+    dtype,
+    dlayout,
+    in_mem_config,
+    output_mem_config,
+    data_seed,
+    device,
+):
+    torch.manual_seed(data_seed)
+
+    x = gen_func_with_cast_tt(
+        partial(gen_rand_exclude_range, excluderange=exclude_range[0], low=-100, high=100), dtype[0]
+    )(input_shape[0])
+    y = gen_func_with_cast_tt(
+        partial(gen_rand_exclude_range, excluderange=exclude_range[0], low=-100, high=100), dtype[1]
+    )(input_shape[0])
+    z = gen_func_with_cast_tt(
+        partial(gen_rand_exclude_range, excluderange=exclude_range[0], low=-100, high=100), dtype[2]
+    )(input_shape[0])
+
+    y.requires_grad = True
+    z.requires_grad = True
+
+    try:
+        # get ref result
+        golden_function = ttnn.get_golden_function(ttnn.hypot_bw)
+        ref_value = golden_function(x, y, z)
+
+        tt_x = ttnn_ops.setup_ttnn_tensor(x, device, dlayout[0], in_mem_config[0], dtype[0])
+        tt_y = ttnn_ops.setup_ttnn_tensor(y, device, dlayout[0], in_mem_config[1], dtype[1])
+        tt_z = ttnn_ops.setup_ttnn_tensor(z, device, dlayout[0], in_mem_config[2], dtype[2])
+
+        tt_result = ttnn.hypot_bw(tt_x, tt_y, tt_z, memory_config=output_mem_config)
+        tt_result = [
+            ttnn_ops.ttnn_tensor_to_torch(tt_result[0]),
+            ttnn_ops.ttnn_tensor_to_torch(tt_result[1]),
+        ]
+
+    except Exception as e:
+        logger.warning(f"Test execution crashed: {e}")
+        print(traceback.format_exc())
+        raise e
+
+    for i in range(2):
+        assert len(tt_result[i].shape) == len(ref_value[i].shape)
+        assert tt_result[i].shape == ref_value[i].shape
+        assert_with_pcc(ref_value[i], tt_result[i], 0.99)
+
+
+test_sweep_args = [
+    (
+        [[4, 226, 2]],
+        [[-1, 1]],
+        [ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.bfloat8_b],
+        [ttnn.TILE_LAYOUT],
+        [ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG],
+        ttnn.DRAM_MEMORY_CONFIG,
+        0,
+    ),
+    (
+        [[2, 3, 243, 9]],
+        [[-1, 1]],
+        [ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.bfloat8_b],
+        [ttnn.TILE_LAYOUT],
+        [ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG],
+        ttnn.DRAM_MEMORY_CONFIG,
+        0,
+    ),
+    (
+        [[8, 141, 15]],
+        [[-1, 1]],
+        [ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.bfloat8_b],
+        [ttnn.TILE_LAYOUT],
+        [ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG],
+        ttnn.DRAM_MEMORY_CONFIG,
+        0,
+    ),
+]
+
+
+@pytest.mark.parametrize(
+    "input_shape, exclude_range, dtype, dlayout, in_mem_config, out_mem_config, data_seed",
+    (test_sweep_args),
+)
+def test_backward_hypot(input_shape, exclude_range, dtype, dlayout, in_mem_config, out_mem_config, data_seed, device):
+    run_backward_hypot_tests(
+        input_shape, exclude_range, dtype, dlayout, in_mem_config, out_mem_config, data_seed, device
+    )
diff --git a/tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_backward_rdiv_nonzero_bfloat8b.py b/tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_backward_rdiv_nonzero_bfloat8b.py
new file mode 100644
index 00000000000..81f59e59e9b
--- /dev/null
+++ b/tests/ttnn/python_api_testing/non_working_unit_tests/wormhole/test_backward_rdiv_nonzero_bfloat8b.py
@@ -0,0 +1,103 @@
+# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from loguru import logger
+import random
+import pytest
+import torch
+import ttnn
+import traceback
+from itertools import product
+from functools import partial
+
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt
+from tests.sweep_framework.sweep_utils.utils import gen_rand_exclude_range
+
+from tests.ttnn.utils_for_testing import assert_with_pcc
+from tests.ttnn.python_api_testing.sweep_tests import ttnn_ops
+
+
+def run_backward_div_tests(
+    input_shape,
+    exclude_range,
+    dtype,
+    dlayout,
+    in_mem_config,
+    output_mem_config,
+    data_seed,
+    device,
+):
+    torch.manual_seed(data_seed)
+
+    x = gen_func_with_cast_tt(
+        partial(gen_rand_exclude_range, excluderange=exclude_range[0], low=-100, high=100), dtype[0]
+    )(input_shape[0])
+    y = gen_func_with_cast_tt(
+        partial(gen_rand_exclude_range, excluderange=exclude_range[0], low=-100, high=100), dtype[1]
+    )(input_shape[0])
+
+    factor = torch.tensor(1, dtype=torch.bfloat16).uniform_(-100, 100).item()
+    while (factor > exclude_range[0][0]) & (factor < exclude_range[0][1]):
+        factor = torch.tensor(1, dtype=torch.bfloat16).uniform_(-100, 100).item()
+
+    y.requires_grad = True
+
+    try:
+        # get ref result
+        golden_function = ttnn.get_golden_function(ttnn.rdiv_bw)
+        ref_value = golden_function(x, y, factor)[0]
+
+        tt_x = ttnn_ops.setup_ttnn_tensor(x, device, dlayout[0], in_mem_config[0], dtype[0])
+        tt_y = ttnn_ops.setup_ttnn_tensor(y, device, dlayout[0], in_mem_config[1], dtype[1])
+
+        tt_result = ttnn.rdiv_bw(tt_x, tt_y, factor, memory_config=output_mem_config)[0]
+        tt_result = ttnn_ops.ttnn_tensor_to_torch(tt_result)
+
+    except Exception as e:
+        logger.warning(f"Test execution crashed: {e}")
+        print(traceback.format_exc())
+        raise e
+
+    assert len(tt_result.shape) == len(ref_value.shape)
+    assert tt_result.shape == ref_value.shape
+    assert_with_pcc(ref_value, tt_result, 0.99)
+
+
+test_sweep_args = [
+    (
+        [[9, 134, 12]],
+        [[-1, 1]],
+        [ttnn.bfloat8_b, ttnn.bfloat8_b],
+        [ttnn.TILE_LAYOUT],
+        [ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG],
+        ttnn.DRAM_MEMORY_CONFIG,
+        0,
+    ),
+    (
+        [[1, 54, 14]],
+        [[-1, 1]],
+        [ttnn.bfloat8_b, ttnn.bfloat8_b],
+        [ttnn.TILE_LAYOUT],
+        [ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG],
+        ttnn.DRAM_MEMORY_CONFIG,
+        0,
+    ),
+    (
+        [[223, 3]],
+        [[-1, 1]],
+        [ttnn.bfloat8_b, ttnn.bfloat8_b],
+        [ttnn.TILE_LAYOUT],
+        [ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG],
+        ttnn.DRAM_MEMORY_CONFIG,
+        0,
+    ),
+]
+
+
+@pytest.mark.parametrize(
+    "input_shape, exclude_range, dtype, dlayout, in_mem_config, out_mem_config, data_seed",
+    (test_sweep_args),
+)
+def test_backward_div(input_shape, exclude_range, dtype, dlayout, in_mem_config, out_mem_config, data_seed, device):
+    run_backward_div_tests(input_shape, exclude_range, dtype, dlayout, in_mem_config, out_mem_config, data_seed, device)