diff --git a/tests/ttnn/profiling/ops_for_profiling.py b/tests/ttnn/profiling/ops_for_profiling.py
index aa9d454dfd2..070dfce16d9 100644
--- a/tests/ttnn/profiling/ops_for_profiling.py
+++ b/tests/ttnn/profiling/ops_for_profiling.py
@@ -82,6 +82,16 @@ def bcast_hw_shape_func_11(input_shape):
     return input_shape, input_shape_1
 
 
+def bcast_h_shape_func_1(input_shape):
+    input_shape_1 = [input_shape[-4], input_shape[-3], 1, input_shape[-1]]
+    return input_shape, input_shape_1
+
+
+def bcast_w_shape_func_1(input_shape):
+    input_shape_1 = [input_shape[-4], input_shape[-3], input_shape[-2], 1]
+    return input_shape, input_shape_1
+
+
 def complex_add(x, y):
     tt_lib.tensor.complex_add(
         x, y, tt_lib.tensor.MemoryConfig(tt_lib.tensor.TensorMemoryLayout.INTERLEAVED, tt_lib.tensor.BufferType.DRAM)
@@ -151,15 +161,15 @@ def unary_pow_bw(x, y):
 
 
 def clamp_bw(x, y):
-    ttnn.clamp_bw(x, y, 0.1, 0.9)
+    ttnn.clamp_bw(x, y, min=0.1, max=0.9)
 
 
 def clamp_min_bw(x, y):
-    ttnn.clamp_min_bw(x, y, 0.1)
+    ttnn.clamp_bw(x, y, min=0.1)
 
 
 def clamp_max_bw(x, y):
-    ttnn.clamp_max_bw(x, y, 0.9)
+    ttnn.clamp_bw(x, y, max=0.9)
 
 
 def gelu_bw_none(x, y):
@@ -207,7 +217,7 @@ def unary_eq_bw(x, y):
 
 
 def logiteps_bw(x, y):
-    ttnn.logiteps_bw(x, y, 0.0001)
+    ttnn.logiteps_bw(x, y, eps=0.0001)
 
 
 def fmod_bw(x, y):
@@ -418,23 +428,23 @@ def angle_bw(x, y):
 
 
 def celu_bw(x, y):
-    ttnn.celu_bw(x, y, 1)
+    ttnn.celu_bw(x, y, alpha=1)
 
 
 def hardshrink_bw(x, y):
-    ttnn.hardshrink_bw(x, y, 0.5)
+    ttnn.hardshrink_bw(x, y, lambd=0.5)
 
 
 def leaky_relu_bw(x, y):
-    ttnn.leaky_relu_bw(x, y, 0.3)
+    ttnn.leaky_relu_bw(x, y, negative_slope=0.3)
 
 
 def softshrink_bw(x, y):
-    ttnn.softshrink_bw(x, y, 0.5)
+    ttnn.softshrink_bw(x, y, lambd=0.5)
 
 
 def unary_div_bw(x, y):
-    ttnn.div_bw(x, y, 3, round_mode="None")
+    ttnn.div_bw(x, y, 3.0, round_mode="None")
 
 
 all_binary_ops = [
@@ -450,6 +460,16 @@ def unary_div_bw(x, y):
         "op": ttnn.mul,
         "name": "ttnn.mul",
     },
+    {
+        "op": ttnn.mul,
+        "name": "ttnn.mul_bcast_h",
+        "shape_func": bcast_h_shape_func_1,
+    },
+    {
+        "op": ttnn.mul,
+        "name": "ttnn.mul_bcast_w",
+        "shape_func": bcast_w_shape_func_1,
+    },
     {
         "op": ttnn.mul,
         "name": "ttnn.mul_bcast_hw",
@@ -646,7 +666,7 @@ def unary_div_bw(x, y):
     },
     {
         "op": ttnn.embedding,
-        "name": "tt_lib.tensor.embeddings",
+        "name": "ttnn.embedding",
         "layout": "ROW_MAJOR",
         "shape_func": embeddings_shape_func,
     },
@@ -1170,11 +1190,11 @@ def leaky_relu(x):
 
 
 def softshrink(x):
-    ttnn.softshrink(x, 70)
+    ttnn.softshrink(x, lambd=70)
 
 
 def hardshrink(x):
-    ttnn.hardshrink(x, 1)
+    ttnn.hardshrink(x, lambd=1)
 
 
 def elu(x):
@@ -1194,7 +1214,7 @@ def bias_gelu_unary(x):
 
 
 def logit(x):
-    ttnn.logit(x, 0.0001)
+    ttnn.logit(x, eps=0.0001)
 
 
 def logical_andi(x):
@@ -1309,14 +1329,6 @@ def empty(x):
     ttnn.empty(shape=x.get_legacy_shape(), dtype=x.get_dtype(), layout=x.get_layout(), device=x.device())
 
 
-def tril(x):
-    ttnn.tril(x, 1)
-
-
-def triu(x):
-    ttnn.triu(x, 1)
-
-
 def sum_dim_2(x):
     ttnn.sum(x, dim=2)
 
@@ -1951,6 +1963,7 @@ def clone(x):
     {
         "op": tilize,
         "name": "ttnn.tilize",
+        "layout": "ROW_MAJOR",
     },
     {
         "op": tt_lib.tensor.untilize,
@@ -1968,6 +1981,7 @@ def clone(x):
     {
         "op": ttnn.tilize_with_zero_padding,
         "name": "ttnn.tilize_with_zero_padding",
+        "layout": "ROW_MAJOR",
     },
     {
         "op": pad,
@@ -2022,12 +2036,12 @@ def clone(x):
         "name": "ttnn.empty",
     },
     {
-        "op": tril,
+        "op": ttnn.tril,
         "name": "ttnn.tril",
         "num_repeats": 3,
     },
     {
-        "op": triu,
+        "op": ttnn.triu,
         "name": "ttnn.triu",
         "num_repeats": 3,
     },
@@ -2181,16 +2195,16 @@ def clone(x):
         "name": "tt_lib.tensor.mean_hw",
     },
     {
-        "op": tt_lib.tensor.var_hw,
-        "name": "tt_lib.tensor.var_hw",
+        "op": ttnn.var_hw,
+        "name": "ttnn.var_hw",
     },
     {
         "op": logical_noti,
         "name": "tt_lib.tensor.logical_noti",
     },
     {
-        "op": tt_lib.tensor.std_hw,
-        "name": "tt_lib.tensor.std_hw",
+        "op": ttnn.std_hw,
+        "name": "ttnn.std_hw",
     },
     {
         "op": ttnn.normalize_hw,
@@ -2534,18 +2548,23 @@ def div_bw(x, y, z):
     ttnn.div_bw(x, y, z, round_mode="None")
 
 
+def add_bw(x, y, z):
+    ttnn.add_bw(x, y, z)
+
+
 def primary_moreh_norm_backward(x, y, z):
     tt_lib.operations.primary.moreh_norm_backward(x, y, z, p=2.0)
 
 
-def fused_linear(x, weight, bias):
+def linear(x, weight, bias):
     ttnn.linear(x, weight, bias=bias)
 
 
-def fused_linear_shape_func(input_shape):
-    x_shape = [1, 1, input_shape[-2], input_shape[-1]]
-    weight_shape = [1, 1, input_shape[-2], input_shape[-1]]
-    bias_shape = [1, 1, 32, input_shape[-1]]
+def linear_shape_func(input_shape):
+    N = input_shape[-1]
+    x_shape = [1, input_shape[-2], N]
+    weight_shape = [N, N]
+    bias_shape = [1, N]
     return x_shape, weight_shape, bias_shape
 
 
@@ -2634,7 +2653,7 @@ def fused_linear_shape_func(input_shape):
         "name": "ttnn.min_bw",
     },
     {
-        "op": ttnn.add_bw,
+        "op": add_bw,
         "name": "ttnn.add_bw",
     },
     # {
@@ -2726,9 +2745,9 @@ def fused_linear_shape_func(input_shape):
         "name": "tt_lib.tensor.moreh_norm_backward",
     },
     {
-        "op": fused_linear,
+        "op": linear,
         "name": "ttnn.linear",
-        "shape_func": fused_linear_shape_func,
+        "shape_func": linear_shape_func,
     },
     {
         "op": ttnn.ge_bw,
diff --git a/tests/ttnn/profiling/reference.txt b/tests/ttnn/profiling/reference.txt
index e7ee640ab5f..94924e2a625 100644
--- a/tests/ttnn/profiling/reference.txt
+++ b/tests/ttnn/profiling/reference.txt
@@ -1,5 +1,5 @@
 op,count,python min dispatch time (ms),python mean dispatch time(ms),python mean dispatch + sync time (ms),C++ mean dispatch time (ms)
-tt_lib.fused_ops.softmax.softmax,200,0.129,0.138,0.383,0.077
+tt_lib.fused_ops.softmax.softmax,200,0.179,0.192,0.372,0.103
 tt_lib.operations.primary.moreh_logsoftmax_backward_dim_0,200,0.029,0.031,0.265,0.013
 tt_lib.operations.primary.moreh_logsoftmax_backward_dim_1,200,0.026,0.028,0.295,0.011
 tt_lib.operations.primary.moreh_logsoftmax_backward_dim_2,200,0.026,0.028,0.342,0.011
@@ -40,86 +40,86 @@ tt_lib.tensor.argmax_all,80,80.324,80.365,80.562,1.332
 tt_lib.tensor.argmax_dim_1,80,82.097,82.944,83.752,0.89
 tt_lib.tensor.argmax_dim_2,80,178.316,178.317,178.87,0.673
 tt_lib.tensor.argmax_dim_3,80,79.938,80.094,80.491,0.738
-tt_lib.tensor.argmin_all,80,80.354,80.418,80.579,1.34
-tt_lib.tensor.argmin_dim_1,80,78.805,78.958,79.711,0.828
+tt_lib.tensor.argmin_all,80,91.213,91.972,92.18,1.415
+tt_lib.tensor.argmin_dim_1,80,91.459,91.551,92.49,0.841
 tt_lib.tensor.argmin_dim_2,80,177.736,177.917,177.946,0.684
 tt_lib.tensor.argmin_dim_3,80,89.948,90.192,90.39,0.755
 tt_lib.tensor.assign_binary,200,0.013,0.014,0.1,0.008
 tt_lib.tensor.assign_unary,200,0.013,0.013,0.1,0.008
 tt_lib.tensor.bcast_add_h,200,0.062,0.065,0.216,0.047
 tt_lib.tensor.bcast_add_hw,200,0.058,0.065,0.205,0.045
-tt_lib.tensor.bcast_add_w,200,0.062,0.065,0.19,0.047
-tt_lib.tensor.bcast_mul_h,200,0.061,0.065,0.188,0.049
+tt_lib.tensor.bcast_add_w,200,0.072,0.074,0.189,0.055
+tt_lib.tensor.bcast_mul_h,200,0.07,0.074,0.19,0.054
 tt_lib.tensor.bcast_mul_hw,200,0.058,0.062,0.204,0.045
 tt_lib.tensor.bcast_mul_w,200,0.061,0.065,0.19,0.047
 tt_lib.tensor.bcast_sub_h,200,0.061,0.064,0.187,0.047
 tt_lib.tensor.bcast_sub_hw,200,0.062,0.066,0.205,0.045
 tt_lib.tensor.bcast_sub_w,200,0.061,0.065,0.19,0.047
-tt_lib.tensor.bias_gelu_unary,200,0.085,0.088,0.214,0.056
+tt_lib.tensor.bias_gelu_unary,200,0.099,0.102,0.214,0.063
 tt_lib.tensor.complex_abs,200,0.136,0.153,0.352,0.065
 tt_lib.tensor.complex_add,200,0.036,0.04,0.148,0.014
-tt_lib.tensor.complex_div,200,0.552,0.596,1.602,0.268
+tt_lib.tensor.complex_div,200,0.628,0.645,1.591,0.283
 tt_lib.tensor.complex_mul,200,0.263,0.272,0.783,0.125
 tt_lib.tensor.complex_recip,200,0.294,0.306,0.814,0.138
 tt_lib.tensor.complex_sub,200,0.034,0.042,0.148,0.015
-tt_lib.tensor.conj,200,0.085,0.089,0.265,0.039
+tt_lib.tensor.conj,200,0.103,0.109,0.255,0.044
 tt_lib.tensor.conj_bw,200,0.097,0.103,0.265,0.043
 tt_lib.tensor.copy,200,0.021,0.022,0.101,0.008
 tt_lib.tensor.fill_ones_rm,200,0.017,0.02,2.028,0.007
 tt_lib.tensor.fill_rm,200,0.018,0.018,2.028,0.006
-tt_lib.tensor.geglu_dim_2,200,0.091,0.094,0.245,0.044
-tt_lib.tensor.geglu_dim_3,200,0.091,0.093,0.245,0.042
-tt_lib.tensor.global_max,200,0.646,0.678,0.686,0.431
+ttnn.geglu_dim_2,200,0.105,0.111,0.236,0.045
+ttnn.geglu_dim_3,200,0.105,0.111,0.236,0.045
+tt_lib.tensor.global_max,200,0.798,0.874,0.85,0.506
 tt_lib.tensor.global_mean,200,0.735,0.828,0.8,0.471
 tt_lib.tensor.global_min,200,0.908,0.919,0.904,0.519
 tt_lib.tensor.global_sum,200,0.651,0.704,0.69,0.434
-tt_lib.tensor.glu_dim_2,200,0.09,0.096,0.266,0.043
-tt_lib.tensor.glu_dim_3,200,0.09,0.093,0.266,0.042
+ttnn.glu_dim_2,200,0.09,0.096,0.266,0.043
+ttnn.glu_dim_3,200,0.12,0.128,0.266,0.054
 tt_lib.tensor.imag,200,0.025,0.027,0.058,0.011
 tt_lib.tensor.imag_bw,200,0.133,0.137,0.274,0.076
 tt_lib.tensor.lamb_optimizer,80,3.468,3.474,6.687,2.054
 tt_lib.tensor.logical_andi,200,0.022,0.023,0.104,0.009
-tt_lib.tensor.logical_noti,200,0.152,0.156,0.322,0.101
+tt_lib.tensor.logical_noti,200,0.173,0.179,0.32,0.116
 tt_lib.tensor.logical_ori,200,0.134,0.138,0.224,0.091
 tt_lib.tensor.logical_xori,200,0.024,0.025,0.102,0.01
-tt_lib.tensor.mac,200,0.058,0.06,0.306,0.024
+tt_lib.tensor.mac,200,0.066,0.068,0.279,0.027
 tt_lib.tensor.maeloss,200,0.682,0.716,0.724,0.445
 tt_lib.tensor.mean_hw,200,0.027,0.029,0.08,0.012
 tt_lib.tensor.moreh_norm_backward,200,0.036,0.038,0.667,0.017
-tt_lib.tensor.mseloss,200,0.673,0.687,0.698,0.434
+tt_lib.tensor.mseloss,200,0.789,0.911,0.992,0.55
 tt_lib.tensor.normalize_global,200,0.256,0.262,56.404,0.154
-tt_lib.tensor.normalize_hw,200,0.226,0.242,0.67,0.145
+ttnn.normalize_hw,200,0.226,0.242,0.67,0.145
 tt_lib.tensor.polar_binary,200,0.118,0.121,0.751,0.052
 tt_lib.tensor.pow_float,200,0.329,0.342,1.268,0.186
 tt_lib.tensor.pow_int,200,0.025,0.028,0.102,0.01
 tt_lib.tensor.real,200,0.027,0.029,0.06,0.012
 tt_lib.tensor.real_bw,200,0.124,0.127,0.275,0.075
-tt_lib.tensor.reglu_dim_2,200,0.089,0.095,0.246,0.044
-tt_lib.tensor.reglu_dim_3,200,0.091,0.092,0.245,0.043
+ttnn.reglu_dim_2,200,0.102,0.107,0.245,0.045
+ttnn.reglu_dim_3,200,0.105,0.111,0.244,0.045
 tt_lib.tensor.repeat,200,0.025,0.027,0.368,0.009
-tt_lib.tensor.repeat_interleave_dim_0,200,0.032,0.034,0.377,0.01
-tt_lib.tensor.repeat_interleave_dim_1,80,0.397,0.399,322.452,0.215
-tt_lib.tensor.repeat_interleave_dim_2,80,0.152,0.154,150.628,0.076
+ttnn.repeat_interleave_dim_0,200,0.039,0.043,0.375,0.01
+ttnn.repeat_interleave_dim_1,80,0.42,0.429,323.298,0.219
+ttnn.repeat_interleave_dim_2,80,0.152,0.154,150.628,0.076
 tt_lib.tensor.rpow,200,0.09,0.094,0.243,0.058
 tt_lib.tensor.split_last_dim_two_chunks_tiled,200,0.017,0.017,0.098,0.009
-tt_lib.tensor.std_hw,200,0.139,0.148,0.463,0.084
+ttnn.std_hw,200,0.139,0.148,0.463,0.084
 tt_lib.tensor.subalpha,200,0.12,0.135,0.35,0.077
 tt_lib.tensor.sum_dim_0,80,0.44,0.46,1051.11,0.292
 tt_lib.tensor.sum_dim_1,200,0.164,0.17,12.75,0.113
-tt_lib.tensor.swiglu_dim_2,200,0.094,0.102,0.265,0.044
-tt_lib.tensor.swiglu_dim_3,200,0.09,0.098,0.266,0.043
+ttnn.swiglu_dim_2,200,0.094,0.102,0.265,0.044
+ttnn.swiglu_dim_3,200,0.111,0.114,0.256,0.047
 tt_lib.tensor.transpose,200,0.07,0.072,0.133,0.057
 tt_lib.tensor.typecast,200,0.021,0.021,0.08,0.009
 tt_lib.tensor.untilize,200,0.018,0.019,0.112,0.007
 tt_lib.tensor.untilize_with_unpadding,200,0.026,0.027,3.608,0.007
-tt_lib.tensor.var_hw,200,0.121,0.125,0.456,0.076
+ttnn.var_hw,200,0.14,0.15,0.455,0.094
 ttnn.abs,200,0.024,0.025,0.101,0.009
 ttnn.abs_bw,200,0.062,0.075,0.241,0.023
 ttnn.acos,200,0.023,0.024,0.104,0.009
 ttnn.acos_bw,200,2.753,2.757,5.251,0.439
 ttnn.acosh,200,1.415,1.568,2.677,0.311
 ttnn.acosh_bw,200,4.95,4.992,6.302,0.372
-ttnn.add,200,0.031,0.034,0.143,0.013
+ttnn.add,200,0.037,0.039,0.142,0.014
 ttnn.add_bw,200,0.071,0.074,0.114,0.05
 ttnn.add_unary,200,0.074,0.077,0.118,0.053
 ttnn.addalpha_bw,200,0.098,0.106,0.119,0.05
@@ -141,37 +141,37 @@ ttnn.bias_gelu,200,0.034,0.041,0.146,0.019
 ttnn.bias_gelu_bw_none,200,0.503,0.509,1.626,0.31
 ttnn.bias_gelu_bw_tanh,200,0.926,0.985,2.54,0.607
 ttnn.bias_gelu_unary_bw_none,200,0.538,0.564,1.602,0.352
-ttnn.bias_gelu_unary_bw_tanh,200,0.951,0.982,2.513,0.625
+ttnn.bias_gelu_unary_bw_tanh,200,1.113,1.152,2.504,0.712
 ttnn.cbrt,200,0.177,0.196,0.683,0.1
 ttnn.ceil_bw,200,0.836,0.844,0.858,
 ttnn.celu_bw,200,1.988,2.032,2.46,0.128
 ttnn.clamp_bw,200,0.149,0.161,0.686,0.062
 ttnn.clamp_max_bw,200,0.092,0.096,0.341,0.031
 ttnn.clamp_min_bw,200,0.092,0.109,0.375,0.035
-ttnn.clip,200,0.583,0.611,1.968,0.326
+ttnn.clip,200,2.088,2.12,2.686,0.142
 ttnn.clone,200,0.025,0.027,0.101,0.009
 ttnn.concat_bw_dim_0,200,0.07,0.074,0.203,0.022
 ttnn.concat_bw_dim_1,200,0.071,0.075,0.204,0.022
 ttnn.concat_bw_dim_2,200,0.072,0.077,0.134,0.023
 ttnn.concat_bw_dim_3,200,0.072,0.075,0.206,0.022
 ttnn.concat_dim_0,200,0.034,0.038,0.198,0.01
-ttnn.concat_dim_1,200,0.03,0.033,0.199,0.009
-ttnn.concat_dim_2,200,0.031,0.034,0.198,0.01
+ttnn.concat_dim_1,200,0.034,0.036,0.198,0.011
+ttnn.concat_dim_2,200,0.034,0.034,0.198,0.01
 ttnn.concat_dim_3,200,0.037,0.039,0.191,0.011
 ttnn.cos,200,0.022,0.023,0.14,0.009
 ttnn.cos_bw,200,0.091,0.103,0.389,0.035
 ttnn.cosh,200,1.044,1.056,1.27,0.056
-ttnn.cosh_bw,120,3.021,3.029,5.423,0.738
-ttnn.deg2rad,200,0.073,0.076,0.115,0.049
+ttnn.cosh_bw,120,4.006,4.016,5.933,0.418
+ttnn.deg2rad,200,0.084,0.087,0.14,0.054
 ttnn.deg2rad_bw,200,0.078,0.081,0.115,0.047
 ttnn.digamma,200,1.019,1.023,3.225,0.597
 ttnn.digamma_bw,80,2.303,2.328,7.672,1.258
 ttnn.div_bw,120,3.578,3.584,6.382,0.478
 ttnn.div_no_nan_bw,200,1.954,1.982,2.174,0.13
-ttnn.divide,200,0.03,0.032,0.145,0.013
+ttnn.divide,200,0.035,0.039,0.146,0.014
 ttnn.elu,200,0.024,0.025,0.149,0.009
 ttnn.elu_bw,200,0.291,0.327,1.091,0.153
-ttnn.embedding,200,0.025,0.027,0.077,0.008
+ttnn.embedding,200,0.032,0.033,0.074,0.01
 ttnn.empty,200,0.822,0.825,0.846,
 ttnn.eq,200,0.031,0.033,0.143,0.013
 ttnn.eq_bw,200,0.143,0.149,0.226,0.097
@@ -185,9 +185,9 @@ ttnn.erfc_fast_and_approx_True,200,0.023,0.026,0.266,0.01
 ttnn.erfinv,200,0.023,0.025,0.692,0.009
 ttnn.erfinv_bw,200,3.914,3.92,5.93,0.454
 ttnn.exp,200,0.023,0.023,0.13,0.009
-ttnn.exp2,200,0.022,0.023,0.103,0.009
+ttnn.exp2,200,0.025,0.027,0.101,0.01
 ttnn.exp2_bw,200,0.125,0.126,0.352,0.068
-ttnn.exp_bw,200,0.74,0.761,2.956,0.373
+ttnn.exp_bw,200,0.886,0.903,2.944,0.429
 ttnn.expm1,200,0.026,0.028,0.106,0.01
 ttnn.expm1_bw,200,0.128,0.129,0.35,0.069
 ttnn.fill_bw,200,0.801,0.828,0.893,0.524
@@ -226,7 +226,7 @@ ttnn.isinf,200,0.023,0.023,0.102,0.009
 ttnn.isnan,200,0.023,0.024,0.102,0.009
 ttnn.isneginf,200,0.023,0.024,0.102,0.009
 ttnn.isposinf,200,0.023,0.023,0.101,0.009
-ttnn.ldexp,200,0.03,0.038,0.144,0.013
+ttnn.ldexp,200,0.035,0.036,0.142,0.014
 ttnn.ldexp_bw,200,0.236,0.242,0.627,0.138
 ttnn.le,200,0.034,0.035,0.145,0.014
 ttnn.le_bw,200,1.714,1.746,1.79,
@@ -239,18 +239,18 @@ ttnn.lerp_bw_tensor_weight,200,0.913,0.951,1.041,0.042
 ttnn.lez,200,0.022,0.023,0.102,0.01
 ttnn.lgamma,120,3.858,4.337,6.365,1.264
 ttnn.lgamma_bw,200,1.134,1.146,3.364,0.65
-ttnn.linear,200,0.081,0.088,0.854,0.061
+ttnn.linear,200,0.111,0.12,0.851,0.073
 ttnn.log,200,0.022,0.024,0.102,0.009
 ttnn.log10,200,0.022,0.023,0.103,0.009
 ttnn.log10_bw,200,0.715,0.723,2.633,0.396
 ttnn.log1p,200,0.907,0.914,0.953,0.028
 ttnn.log1p_bw,200,1.659,1.776,3.271,0.417
-ttnn.log2,200,0.022,0.023,0.102,0.009
+ttnn.log2,200,0.025,0.028,0.103,0.011
 ttnn.log2_bw,200,0.803,0.918,2.633,0.475
 ttnn.log_bw,200,2.099,2.13,3.087,0.186
 ttnn.log_sigmoid,200,0.023,0.024,0.182,0.009
 ttnn.log_sigmoid_bw,200,0.712,0.77,2.403,0.434
-ttnn.logaddexp,200,0.033,0.034,0.316,0.014
+ttnn.logaddexp,200,0.038,0.042,0.318,0.015
 ttnn.logaddexp2,200,0.034,0.035,0.201,0.014
 ttnn.logaddexp2_bw,200,0.483,0.517,1.451,0.288
 ttnn.logaddexp_bw,200,0.336,0.403,1.233,0.203
@@ -258,8 +258,8 @@ ttnn.logical_and,200,0.031,0.034,0.143,0.013
 ttnn.logical_not,200,0.023,0.026,0.101,0.009
 ttnn.logical_or,200,0.031,0.034,0.145,0.013
 ttnn.logical_xor,200,0.199,0.214,0.926,0.082
-ttnn.logit,200,1.483,1.516,4.84,0.767
-ttnn.logit_bw,200,0.898,1.005,3.314,0.493
+ttnn.logit,200,3.023,3.057,5.468,0.61
+ttnn.logit_bw,200,1.725,1.786,3.623,0.316
 ttnn.logiteps_bw,200,4.401,4.438,6.012,0.456
 ttnn.lt,200,0.033,0.035,0.142,0.015
 ttnn.lt_bw,200,1.635,1.747,1.749,
@@ -267,14 +267,17 @@ ttnn.ltz,200,0.022,0.023,0.102,0.009
 ttnn.matmul,200,0.027,0.029,3.139,0.007
 ttnn.max_bw,200,1.218,1.262,2.008,0.164
 ttnn.max_dim_2,200,0.035,0.037,0.084,0.007
-ttnn.max_dim_23,200,0.039,0.046,0.081,0.012
-ttnn.max_dim_3,200,0.03,0.032,0.059,0.007
+ttnn.max_dim_23,200,0.046,0.053,0.081,0.016
+ttnn.max_dim_3,200,0.034,0.038,0.059,0.007
 ttnn.min_bw,200,1.242,1.289,2.011,0.185
-ttnn.min_dim_2,200,0.066,0.069,0.192,0.024
+ttnn.min_dim_2,200,0.08,0.091,0.185,0.027
 ttnn.min_dim_23,200,0.072,0.075,0.184,0.027
 ttnn.min_dim_3,200,0.066,0.07,0.167,0.023
 ttnn.mish,200,0.09,0.097,0.578,0.034
 ttnn.mul,200,0.035,0.036,0.143,0.014
+ttnn.mul_bcast_h,200,0.068,0.07,0.186,0.05
+ttnn.mul_bcast_w,200,0.069,0.072,0.188,0.051
+ttnn.mul_bcast_hw,200,0.039,0.04,0.201,0.019
 ttnn.mul_bw,200,0.06,0.062,0.28,0.025
 ttnn.mul_unary,200,0.066,0.069,0.128,0.047
 ttnn.multigammaln,200,23.793,23.819,26.245,10.039
@@ -314,7 +317,7 @@ ttnn.rsqrt_fast_and_approx_False,200,0.025,0.026,0.793,0.009
 ttnn.rsqrt_fast_and_approx_True,200,0.024,0.025,0.793,0.009
 ttnn.rsub,200,0.024,0.025,0.102,0.009
 ttnn.rsub_bw,200,0.105,0.123,0.216,0.063
-ttnn.scale_mask_softmax_in_place,200,0.074,0.076,0.127,0.045
+ttnn.scale_mask_softmax_in_place,200,0.086,0.089,0.129,0.052
 ttnn.selu_bw,200,0.329,0.378,1.202,0.199
 ttnn.sigmoid,200,0.023,0.024,0.136,0.009
 ttnn.sigmoid_accurate,200,0.023,0.023,0.109,0.009
@@ -326,38 +329,38 @@ ttnn.silu_bw,200,1.101,1.117,1.541,0.121
 ttnn.sin,200,0.022,0.023,0.151,0.009
 ttnn.sin_bw,200,0.063,0.068,0.292,0.024
 ttnn.sinh,200,1.038,1.048,1.278,0.056
-ttnn.sinh_bw,200,2.77,2.835,5.21,0.53
+ttnn.sinh_bw,200,3.844,3.864,5.754,0.398
 ttnn.slice,200,0.031,0.033,0.098,0.012
 ttnn.softmax_in_place,200,0.072,0.075,0.113,0.051
 ttnn.softplus,200,0.024,0.025,0.285,0.009
 ttnn.softplus_bw,200,0.452,0.475,1.559,0.269
 ttnn.softshrink,200,0.26,0.267,0.841,0.148
 ttnn.softshrink_bw,200,2.851,2.874,2.976,0.113
-ttnn.softsign,200,0.137,0.142,0.449,0.079
+ttnn.softsign,200,0.155,0.163,0.448,0.087
 ttnn.softsign_bw,200,0.059,0.062,0.248,0.021
 ttnn.sqrt,200,0.023,0.023,0.103,0.009
-ttnn.sqrt_bw,200,0.777,0.943,2.968,0.499
+ttnn.sqrt_bw,200,0.883,0.918,2.96,0.476
 ttnn.square,200,0.023,0.024,0.102,0.009
 ttnn.square_bw,200,0.116,0.121,0.254,0.066
-ttnn.squared_difference,200,0.03,0.032,0.143,0.013
+ttnn.squared_difference,200,0.034,0.037,0.14,0.014
 ttnn.squared_difference_bw,200,0.202,0.206,0.502,0.119
 ttnn.sub,200,0.033,0.036,0.147,0.014
 ttnn.sub_bw,200,0.085,0.089,0.215,0.055
 ttnn.sub_unary,200,0.064,0.068,0.115,0.047
 ttnn.subalpha_bw,200,0.117,0.147,0.215,0.06
 ttnn.sum_dim_2,200,0.031,0.033,0.084,0.007
-ttnn.sum_dim_23,200,0.038,0.041,0.082,0.012
+ttnn.sum_dim_23,200,0.045,0.051,0.079,0.014
 ttnn.sum_dim_3,200,0.031,0.034,0.06,0.007
 ttnn.swish,200,0.029,0.03,0.137,0.009
 ttnn.tan,200,0.023,0.023,0.132,0.009
 ttnn.tan_bw,200,0.145,0.149,0.478,0.078
 ttnn.tanh,200,0.022,0.023,0.101,0.009
 ttnn.tanh_bw,200,0.087,0.092,0.437,0.039
-ttnn.tanhshrink,200,0.053,0.055,0.242,0.021
+ttnn.tanhshrink,200,0.065,0.071,0.238,0.024
 ttnn.tanhshrink_bw,200,0.078,0.089,0.34,0.033
-ttnn.tilize,200,0.012,0.012,0.015,
+ttnn.tilize,200,0.021,0.021,0.111,0.007
 ttnn.tilize_with_val_padding,200,0.027,0.028,3.303,0.007
-ttnn.tilize_with_zero_padding,200,0.011,0.011,0.015,
+ttnn.tilize_with_zero_padding,200,0.021,0.022,2.457,0.007
 ttnn.threshold,200,0.244,0.264,0.709,0.142
 ttnn.threshold_bw,200,1.088,1.109,1.434,0.119
 ttnn.tril,120,97.567,97.777,97.524,0.069
@@ -366,12 +369,12 @@ ttnn.trunc_bw,200,0.85,0.883,0.895,
 ttnn.unary_add_bw,200,0.022,0.023,0.026,
 ttnn.unary_div_bw,200,1.041,1.118,1.086,0.055
 ttnn.unary_pow_bw,200,0.526,0.536,1.919,0.281
-ttnn.where,200,0.125,0.13,0.627,0.056
+ttnn.where,200,0.142,0.147,0.616,0.055
 ttnn.where_binary_x_const_y,200,0.169,0.184,0.596,0.098
 ttnn.where_binary_x_y_const,200,0.172,0.179,0.594,0.093
 ttnn.where_bw,200,0.316,0.327,1.182,0.179
 ttnn.where_x_const_const,200,0.202,0.204,0.559,0.124
-ttnn.xlogy,200,0.407,0.427,1.457,0.207
+ttnn.xlogy,200,1.103,1.131,1.82,0.112
 ttnn.xlogy_bw,200,2.066,2.09,5.443,0.571
 ttnn.zeros,200,0.868,0.88,0.897,
 ttnn.zeros_like,200,0.833,0.845,0.865,
diff --git a/tests/ttnn/unit_tests/operations/backward/test_backward_div.py b/tests/ttnn/unit_tests/operations/backward/test_backward_div.py
index b2c2afbadee..9b61f3ecd65 100644
--- a/tests/ttnn/unit_tests/operations/backward/test_backward_div.py
+++ b/tests/ttnn/unit_tests/operations/backward/test_backward_div.py
@@ -11,7 +11,7 @@
 )
 
 
-@pytest.mark.skip(reason="this test is failing because ttnn.bias_gelu_bw doesn't have a corresponding API call")
+@pytest.mark.skip(reason="this test is failing because ttnn.div_bw doesn't have a corresponding API call")
 @pytest.mark.parametrize(
     "input_shapes",
     (
@@ -44,7 +44,7 @@ def test_bw_div_binary(input_shapes, round_mode, device):
     assert status
 
 
-@pytest.mark.skip(reason="this test is failing because ttnn.bias_gelu_bw doesn't have a corresponding API call")
+@pytest.mark.skip(reason="this test is failing because ttnn.div_bw doesn't have a corresponding API call")
 @pytest.mark.parametrize(
     "input_shapes",
     (