diff --git a/tests/ttnn/profiling/ops_for_profiling.py b/tests/ttnn/profiling/ops_for_profiling.py index aa9d454dfd2..070dfce16d9 100644 --- a/tests/ttnn/profiling/ops_for_profiling.py +++ b/tests/ttnn/profiling/ops_for_profiling.py @@ -82,6 +82,16 @@ def bcast_hw_shape_func_11(input_shape): return input_shape, input_shape_1 +def bcast_h_shape_func_1(input_shape): + input_shape_1 = [input_shape[-4], input_shape[-3], 1, input_shape[-1]] + return input_shape, input_shape_1 + + +def bcast_w_shape_func_1(input_shape): + input_shape_1 = [input_shape[-4], input_shape[-3], input_shape[-2], 1] + return input_shape, input_shape_1 + + def complex_add(x, y): tt_lib.tensor.complex_add( x, y, tt_lib.tensor.MemoryConfig(tt_lib.tensor.TensorMemoryLayout.INTERLEAVED, tt_lib.tensor.BufferType.DRAM) @@ -151,15 +161,15 @@ def unary_pow_bw(x, y): def clamp_bw(x, y): - ttnn.clamp_bw(x, y, 0.1, 0.9) + ttnn.clamp_bw(x, y, min=0.1, max=0.9) def clamp_min_bw(x, y): - ttnn.clamp_min_bw(x, y, 0.1) + ttnn.clamp_bw(x, y, min=0.1) def clamp_max_bw(x, y): - ttnn.clamp_max_bw(x, y, 0.9) + ttnn.clamp_bw(x, y, max=0.9) def gelu_bw_none(x, y): @@ -207,7 +217,7 @@ def unary_eq_bw(x, y): def logiteps_bw(x, y): - ttnn.logiteps_bw(x, y, 0.0001) + ttnn.logiteps_bw(x, y, eps=0.0001) def fmod_bw(x, y): @@ -418,23 +428,23 @@ def angle_bw(x, y): def celu_bw(x, y): - ttnn.celu_bw(x, y, 1) + ttnn.celu_bw(x, y, alpha=1) def hardshrink_bw(x, y): - ttnn.hardshrink_bw(x, y, 0.5) + ttnn.hardshrink_bw(x, y, lambd=0.5) def leaky_relu_bw(x, y): - ttnn.leaky_relu_bw(x, y, 0.3) + ttnn.leaky_relu_bw(x, y, negative_slope=0.3) def softshrink_bw(x, y): - ttnn.softshrink_bw(x, y, 0.5) + ttnn.softshrink_bw(x, y, lambd=0.5) def unary_div_bw(x, y): - ttnn.div_bw(x, y, 3, round_mode="None") + ttnn.div_bw(x, y, 3.0, round_mode="None") all_binary_ops = [ @@ -450,6 +460,16 @@ def unary_div_bw(x, y): "op": ttnn.mul, "name": "ttnn.mul", }, + { + "op": ttnn.mul, + "name": "ttnn.mul_bcast_h", + "shape_func": bcast_h_shape_func_1, + }, + { + "op": ttnn.mul, + "name": "ttnn.mul_bcast_w", + "shape_func": bcast_w_shape_func_1, + }, { "op": ttnn.mul, "name": "ttnn.mul_bcast_hw", @@ -646,7 +666,7 @@ def unary_div_bw(x, y): }, { "op": ttnn.embedding, - "name": "tt_lib.tensor.embeddings", + "name": "ttnn.embedding", "layout": "ROW_MAJOR", "shape_func": embeddings_shape_func, }, @@ -1170,11 +1190,11 @@ def leaky_relu(x): def softshrink(x): - ttnn.softshrink(x, 70) + ttnn.softshrink(x, lambd=70) def hardshrink(x): - ttnn.hardshrink(x, 1) + ttnn.hardshrink(x, lambd=1) def elu(x): @@ -1194,7 +1214,7 @@ def bias_gelu_unary(x): def logit(x): - ttnn.logit(x, 0.0001) + ttnn.logit(x, eps=0.0001) def logical_andi(x): @@ -1309,14 +1329,6 @@ def empty(x): ttnn.empty(shape=x.get_legacy_shape(), dtype=x.get_dtype(), layout=x.get_layout(), device=x.device()) -def tril(x): - ttnn.tril(x, 1) - - -def triu(x): - ttnn.triu(x, 1) - - def sum_dim_2(x): ttnn.sum(x, dim=2) @@ -1951,6 +1963,7 @@ def clone(x): { "op": tilize, "name": "ttnn.tilize", + "layout": "ROW_MAJOR", }, { "op": tt_lib.tensor.untilize, @@ -1968,6 +1981,7 @@ def clone(x): { "op": ttnn.tilize_with_zero_padding, "name": "ttnn.tilize_with_zero_padding", + "layout": "ROW_MAJOR", }, { "op": pad, @@ -2022,12 +2036,12 @@ def clone(x): "name": "ttnn.empty", }, { - "op": tril, + "op": ttnn.tril, "name": "ttnn.tril", "num_repeats": 3, }, { - "op": triu, + "op": ttnn.triu, "name": "ttnn.triu", "num_repeats": 3, }, @@ -2181,16 +2195,16 @@ def clone(x): "name": "tt_lib.tensor.mean_hw", }, { - "op": tt_lib.tensor.var_hw, - "name": "tt_lib.tensor.var_hw", + "op": ttnn.var_hw, + "name": "ttnn.var_hw", }, { "op": logical_noti, "name": "tt_lib.tensor.logical_noti", }, { - "op": tt_lib.tensor.std_hw, - "name": "tt_lib.tensor.std_hw", + "op": ttnn.std_hw, + "name": "ttnn.std_hw", }, { "op": ttnn.normalize_hw, @@ -2534,18 +2548,23 @@ def div_bw(x, y, z): ttnn.div_bw(x, y, z, round_mode="None") +def add_bw(x, y, z): + ttnn.add_bw(x, y, z) + + def primary_moreh_norm_backward(x, y, z): tt_lib.operations.primary.moreh_norm_backward(x, y, z, p=2.0) -def fused_linear(x, weight, bias): +def linear(x, weight, bias): ttnn.linear(x, weight, bias=bias) -def fused_linear_shape_func(input_shape): - x_shape = [1, 1, input_shape[-2], input_shape[-1]] - weight_shape = [1, 1, input_shape[-2], input_shape[-1]] - bias_shape = [1, 1, 32, input_shape[-1]] +def linear_shape_func(input_shape): + N = input_shape[-1] + x_shape = [1, input_shape[-2], N] + weight_shape = [N, N] + bias_shape = [1, N] return x_shape, weight_shape, bias_shape @@ -2634,7 +2653,7 @@ def fused_linear_shape_func(input_shape): "name": "ttnn.min_bw", }, { - "op": ttnn.add_bw, + "op": add_bw, "name": "ttnn.add_bw", }, # { @@ -2726,9 +2745,9 @@ def fused_linear_shape_func(input_shape): "name": "tt_lib.tensor.moreh_norm_backward", }, { - "op": fused_linear, + "op": linear, "name": "ttnn.linear", - "shape_func": fused_linear_shape_func, + "shape_func": linear_shape_func, }, { "op": ttnn.ge_bw, diff --git a/tests/ttnn/profiling/reference.txt b/tests/ttnn/profiling/reference.txt index e7ee640ab5f..94924e2a625 100644 --- a/tests/ttnn/profiling/reference.txt +++ b/tests/ttnn/profiling/reference.txt @@ -1,5 +1,5 @@ op,count,python min dispatch time (ms),python mean dispatch time(ms),python mean dispatch + sync time (ms),C++ mean dispatch time (ms) -tt_lib.fused_ops.softmax.softmax,200,0.129,0.138,0.383,0.077 +tt_lib.fused_ops.softmax.softmax,200,0.179,0.192,0.372,0.103 tt_lib.operations.primary.moreh_logsoftmax_backward_dim_0,200,0.029,0.031,0.265,0.013 tt_lib.operations.primary.moreh_logsoftmax_backward_dim_1,200,0.026,0.028,0.295,0.011 tt_lib.operations.primary.moreh_logsoftmax_backward_dim_2,200,0.026,0.028,0.342,0.011 @@ -40,86 +40,86 @@ tt_lib.tensor.argmax_all,80,80.324,80.365,80.562,1.332 tt_lib.tensor.argmax_dim_1,80,82.097,82.944,83.752,0.89 tt_lib.tensor.argmax_dim_2,80,178.316,178.317,178.87,0.673 tt_lib.tensor.argmax_dim_3,80,79.938,80.094,80.491,0.738 -tt_lib.tensor.argmin_all,80,80.354,80.418,80.579,1.34 -tt_lib.tensor.argmin_dim_1,80,78.805,78.958,79.711,0.828 +tt_lib.tensor.argmin_all,80,91.213,91.972,92.18,1.415 +tt_lib.tensor.argmin_dim_1,80,91.459,91.551,92.49,0.841 tt_lib.tensor.argmin_dim_2,80,177.736,177.917,177.946,0.684 tt_lib.tensor.argmin_dim_3,80,89.948,90.192,90.39,0.755 tt_lib.tensor.assign_binary,200,0.013,0.014,0.1,0.008 tt_lib.tensor.assign_unary,200,0.013,0.013,0.1,0.008 tt_lib.tensor.bcast_add_h,200,0.062,0.065,0.216,0.047 tt_lib.tensor.bcast_add_hw,200,0.058,0.065,0.205,0.045 -tt_lib.tensor.bcast_add_w,200,0.062,0.065,0.19,0.047 -tt_lib.tensor.bcast_mul_h,200,0.061,0.065,0.188,0.049 +tt_lib.tensor.bcast_add_w,200,0.072,0.074,0.189,0.055 +tt_lib.tensor.bcast_mul_h,200,0.07,0.074,0.19,0.054 tt_lib.tensor.bcast_mul_hw,200,0.058,0.062,0.204,0.045 tt_lib.tensor.bcast_mul_w,200,0.061,0.065,0.19,0.047 tt_lib.tensor.bcast_sub_h,200,0.061,0.064,0.187,0.047 tt_lib.tensor.bcast_sub_hw,200,0.062,0.066,0.205,0.045 tt_lib.tensor.bcast_sub_w,200,0.061,0.065,0.19,0.047 -tt_lib.tensor.bias_gelu_unary,200,0.085,0.088,0.214,0.056 +tt_lib.tensor.bias_gelu_unary,200,0.099,0.102,0.214,0.063 tt_lib.tensor.complex_abs,200,0.136,0.153,0.352,0.065 tt_lib.tensor.complex_add,200,0.036,0.04,0.148,0.014 -tt_lib.tensor.complex_div,200,0.552,0.596,1.602,0.268 +tt_lib.tensor.complex_div,200,0.628,0.645,1.591,0.283 tt_lib.tensor.complex_mul,200,0.263,0.272,0.783,0.125 tt_lib.tensor.complex_recip,200,0.294,0.306,0.814,0.138 tt_lib.tensor.complex_sub,200,0.034,0.042,0.148,0.015 -tt_lib.tensor.conj,200,0.085,0.089,0.265,0.039 +tt_lib.tensor.conj,200,0.103,0.109,0.255,0.044 tt_lib.tensor.conj_bw,200,0.097,0.103,0.265,0.043 tt_lib.tensor.copy,200,0.021,0.022,0.101,0.008 tt_lib.tensor.fill_ones_rm,200,0.017,0.02,2.028,0.007 tt_lib.tensor.fill_rm,200,0.018,0.018,2.028,0.006 -tt_lib.tensor.geglu_dim_2,200,0.091,0.094,0.245,0.044 -tt_lib.tensor.geglu_dim_3,200,0.091,0.093,0.245,0.042 -tt_lib.tensor.global_max,200,0.646,0.678,0.686,0.431 +ttnn.geglu_dim_2,200,0.105,0.111,0.236,0.045 +ttnn.geglu_dim_3,200,0.105,0.111,0.236,0.045 +tt_lib.tensor.global_max,200,0.798,0.874,0.85,0.506 tt_lib.tensor.global_mean,200,0.735,0.828,0.8,0.471 tt_lib.tensor.global_min,200,0.908,0.919,0.904,0.519 tt_lib.tensor.global_sum,200,0.651,0.704,0.69,0.434 -tt_lib.tensor.glu_dim_2,200,0.09,0.096,0.266,0.043 -tt_lib.tensor.glu_dim_3,200,0.09,0.093,0.266,0.042 +ttnn.glu_dim_2,200,0.09,0.096,0.266,0.043 +ttnn.glu_dim_3,200,0.12,0.128,0.266,0.054 tt_lib.tensor.imag,200,0.025,0.027,0.058,0.011 tt_lib.tensor.imag_bw,200,0.133,0.137,0.274,0.076 tt_lib.tensor.lamb_optimizer,80,3.468,3.474,6.687,2.054 tt_lib.tensor.logical_andi,200,0.022,0.023,0.104,0.009 -tt_lib.tensor.logical_noti,200,0.152,0.156,0.322,0.101 +tt_lib.tensor.logical_noti,200,0.173,0.179,0.32,0.116 tt_lib.tensor.logical_ori,200,0.134,0.138,0.224,0.091 tt_lib.tensor.logical_xori,200,0.024,0.025,0.102,0.01 -tt_lib.tensor.mac,200,0.058,0.06,0.306,0.024 +tt_lib.tensor.mac,200,0.066,0.068,0.279,0.027 tt_lib.tensor.maeloss,200,0.682,0.716,0.724,0.445 tt_lib.tensor.mean_hw,200,0.027,0.029,0.08,0.012 tt_lib.tensor.moreh_norm_backward,200,0.036,0.038,0.667,0.017 -tt_lib.tensor.mseloss,200,0.673,0.687,0.698,0.434 +tt_lib.tensor.mseloss,200,0.789,0.911,0.992,0.55 tt_lib.tensor.normalize_global,200,0.256,0.262,56.404,0.154 -tt_lib.tensor.normalize_hw,200,0.226,0.242,0.67,0.145 +ttnn.normalize_hw,200,0.226,0.242,0.67,0.145 tt_lib.tensor.polar_binary,200,0.118,0.121,0.751,0.052 tt_lib.tensor.pow_float,200,0.329,0.342,1.268,0.186 tt_lib.tensor.pow_int,200,0.025,0.028,0.102,0.01 tt_lib.tensor.real,200,0.027,0.029,0.06,0.012 tt_lib.tensor.real_bw,200,0.124,0.127,0.275,0.075 -tt_lib.tensor.reglu_dim_2,200,0.089,0.095,0.246,0.044 -tt_lib.tensor.reglu_dim_3,200,0.091,0.092,0.245,0.043 +ttnn.reglu_dim_2,200,0.102,0.107,0.245,0.045 +ttnn.reglu_dim_3,200,0.105,0.111,0.244,0.045 tt_lib.tensor.repeat,200,0.025,0.027,0.368,0.009 -tt_lib.tensor.repeat_interleave_dim_0,200,0.032,0.034,0.377,0.01 -tt_lib.tensor.repeat_interleave_dim_1,80,0.397,0.399,322.452,0.215 -tt_lib.tensor.repeat_interleave_dim_2,80,0.152,0.154,150.628,0.076 +ttnn.repeat_interleave_dim_0,200,0.039,0.043,0.375,0.01 +ttnn.repeat_interleave_dim_1,80,0.42,0.429,323.298,0.219 +ttnn.repeat_interleave_dim_2,80,0.152,0.154,150.628,0.076 tt_lib.tensor.rpow,200,0.09,0.094,0.243,0.058 tt_lib.tensor.split_last_dim_two_chunks_tiled,200,0.017,0.017,0.098,0.009 -tt_lib.tensor.std_hw,200,0.139,0.148,0.463,0.084 +ttnn.std_hw,200,0.139,0.148,0.463,0.084 tt_lib.tensor.subalpha,200,0.12,0.135,0.35,0.077 tt_lib.tensor.sum_dim_0,80,0.44,0.46,1051.11,0.292 tt_lib.tensor.sum_dim_1,200,0.164,0.17,12.75,0.113 -tt_lib.tensor.swiglu_dim_2,200,0.094,0.102,0.265,0.044 -tt_lib.tensor.swiglu_dim_3,200,0.09,0.098,0.266,0.043 +ttnn.swiglu_dim_2,200,0.094,0.102,0.265,0.044 +ttnn.swiglu_dim_3,200,0.111,0.114,0.256,0.047 tt_lib.tensor.transpose,200,0.07,0.072,0.133,0.057 tt_lib.tensor.typecast,200,0.021,0.021,0.08,0.009 tt_lib.tensor.untilize,200,0.018,0.019,0.112,0.007 tt_lib.tensor.untilize_with_unpadding,200,0.026,0.027,3.608,0.007 -tt_lib.tensor.var_hw,200,0.121,0.125,0.456,0.076 +ttnn.var_hw,200,0.14,0.15,0.455,0.094 ttnn.abs,200,0.024,0.025,0.101,0.009 ttnn.abs_bw,200,0.062,0.075,0.241,0.023 ttnn.acos,200,0.023,0.024,0.104,0.009 ttnn.acos_bw,200,2.753,2.757,5.251,0.439 ttnn.acosh,200,1.415,1.568,2.677,0.311 ttnn.acosh_bw,200,4.95,4.992,6.302,0.372 -ttnn.add,200,0.031,0.034,0.143,0.013 +ttnn.add,200,0.037,0.039,0.142,0.014 ttnn.add_bw,200,0.071,0.074,0.114,0.05 ttnn.add_unary,200,0.074,0.077,0.118,0.053 ttnn.addalpha_bw,200,0.098,0.106,0.119,0.05 @@ -141,37 +141,37 @@ ttnn.bias_gelu,200,0.034,0.041,0.146,0.019 ttnn.bias_gelu_bw_none,200,0.503,0.509,1.626,0.31 ttnn.bias_gelu_bw_tanh,200,0.926,0.985,2.54,0.607 ttnn.bias_gelu_unary_bw_none,200,0.538,0.564,1.602,0.352 -ttnn.bias_gelu_unary_bw_tanh,200,0.951,0.982,2.513,0.625 +ttnn.bias_gelu_unary_bw_tanh,200,1.113,1.152,2.504,0.712 ttnn.cbrt,200,0.177,0.196,0.683,0.1 ttnn.ceil_bw,200,0.836,0.844,0.858, ttnn.celu_bw,200,1.988,2.032,2.46,0.128 ttnn.clamp_bw,200,0.149,0.161,0.686,0.062 ttnn.clamp_max_bw,200,0.092,0.096,0.341,0.031 ttnn.clamp_min_bw,200,0.092,0.109,0.375,0.035 -ttnn.clip,200,0.583,0.611,1.968,0.326 +ttnn.clip,200,2.088,2.12,2.686,0.142 ttnn.clone,200,0.025,0.027,0.101,0.009 ttnn.concat_bw_dim_0,200,0.07,0.074,0.203,0.022 ttnn.concat_bw_dim_1,200,0.071,0.075,0.204,0.022 ttnn.concat_bw_dim_2,200,0.072,0.077,0.134,0.023 ttnn.concat_bw_dim_3,200,0.072,0.075,0.206,0.022 ttnn.concat_dim_0,200,0.034,0.038,0.198,0.01 -ttnn.concat_dim_1,200,0.03,0.033,0.199,0.009 -ttnn.concat_dim_2,200,0.031,0.034,0.198,0.01 +ttnn.concat_dim_1,200,0.034,0.036,0.198,0.011 +ttnn.concat_dim_2,200,0.034,0.034,0.198,0.01 ttnn.concat_dim_3,200,0.037,0.039,0.191,0.011 ttnn.cos,200,0.022,0.023,0.14,0.009 ttnn.cos_bw,200,0.091,0.103,0.389,0.035 ttnn.cosh,200,1.044,1.056,1.27,0.056 -ttnn.cosh_bw,120,3.021,3.029,5.423,0.738 -ttnn.deg2rad,200,0.073,0.076,0.115,0.049 +ttnn.cosh_bw,120,4.006,4.016,5.933,0.418 +ttnn.deg2rad,200,0.084,0.087,0.14,0.054 ttnn.deg2rad_bw,200,0.078,0.081,0.115,0.047 ttnn.digamma,200,1.019,1.023,3.225,0.597 ttnn.digamma_bw,80,2.303,2.328,7.672,1.258 ttnn.div_bw,120,3.578,3.584,6.382,0.478 ttnn.div_no_nan_bw,200,1.954,1.982,2.174,0.13 -ttnn.divide,200,0.03,0.032,0.145,0.013 +ttnn.divide,200,0.035,0.039,0.146,0.014 ttnn.elu,200,0.024,0.025,0.149,0.009 ttnn.elu_bw,200,0.291,0.327,1.091,0.153 -ttnn.embedding,200,0.025,0.027,0.077,0.008 +ttnn.embedding,200,0.032,0.033,0.074,0.01 ttnn.empty,200,0.822,0.825,0.846, ttnn.eq,200,0.031,0.033,0.143,0.013 ttnn.eq_bw,200,0.143,0.149,0.226,0.097 @@ -185,9 +185,9 @@ ttnn.erfc_fast_and_approx_True,200,0.023,0.026,0.266,0.01 ttnn.erfinv,200,0.023,0.025,0.692,0.009 ttnn.erfinv_bw,200,3.914,3.92,5.93,0.454 ttnn.exp,200,0.023,0.023,0.13,0.009 -ttnn.exp2,200,0.022,0.023,0.103,0.009 +ttnn.exp2,200,0.025,0.027,0.101,0.01 ttnn.exp2_bw,200,0.125,0.126,0.352,0.068 -ttnn.exp_bw,200,0.74,0.761,2.956,0.373 +ttnn.exp_bw,200,0.886,0.903,2.944,0.429 ttnn.expm1,200,0.026,0.028,0.106,0.01 ttnn.expm1_bw,200,0.128,0.129,0.35,0.069 ttnn.fill_bw,200,0.801,0.828,0.893,0.524 @@ -226,7 +226,7 @@ ttnn.isinf,200,0.023,0.023,0.102,0.009 ttnn.isnan,200,0.023,0.024,0.102,0.009 ttnn.isneginf,200,0.023,0.024,0.102,0.009 ttnn.isposinf,200,0.023,0.023,0.101,0.009 -ttnn.ldexp,200,0.03,0.038,0.144,0.013 +ttnn.ldexp,200,0.035,0.036,0.142,0.014 ttnn.ldexp_bw,200,0.236,0.242,0.627,0.138 ttnn.le,200,0.034,0.035,0.145,0.014 ttnn.le_bw,200,1.714,1.746,1.79, @@ -239,18 +239,18 @@ ttnn.lerp_bw_tensor_weight,200,0.913,0.951,1.041,0.042 ttnn.lez,200,0.022,0.023,0.102,0.01 ttnn.lgamma,120,3.858,4.337,6.365,1.264 ttnn.lgamma_bw,200,1.134,1.146,3.364,0.65 -ttnn.linear,200,0.081,0.088,0.854,0.061 +ttnn.linear,200,0.111,0.12,0.851,0.073 ttnn.log,200,0.022,0.024,0.102,0.009 ttnn.log10,200,0.022,0.023,0.103,0.009 ttnn.log10_bw,200,0.715,0.723,2.633,0.396 ttnn.log1p,200,0.907,0.914,0.953,0.028 ttnn.log1p_bw,200,1.659,1.776,3.271,0.417 -ttnn.log2,200,0.022,0.023,0.102,0.009 +ttnn.log2,200,0.025,0.028,0.103,0.011 ttnn.log2_bw,200,0.803,0.918,2.633,0.475 ttnn.log_bw,200,2.099,2.13,3.087,0.186 ttnn.log_sigmoid,200,0.023,0.024,0.182,0.009 ttnn.log_sigmoid_bw,200,0.712,0.77,2.403,0.434 -ttnn.logaddexp,200,0.033,0.034,0.316,0.014 +ttnn.logaddexp,200,0.038,0.042,0.318,0.015 ttnn.logaddexp2,200,0.034,0.035,0.201,0.014 ttnn.logaddexp2_bw,200,0.483,0.517,1.451,0.288 ttnn.logaddexp_bw,200,0.336,0.403,1.233,0.203 @@ -258,8 +258,8 @@ ttnn.logical_and,200,0.031,0.034,0.143,0.013 ttnn.logical_not,200,0.023,0.026,0.101,0.009 ttnn.logical_or,200,0.031,0.034,0.145,0.013 ttnn.logical_xor,200,0.199,0.214,0.926,0.082 -ttnn.logit,200,1.483,1.516,4.84,0.767 -ttnn.logit_bw,200,0.898,1.005,3.314,0.493 +ttnn.logit,200,3.023,3.057,5.468,0.61 +ttnn.logit_bw,200,1.725,1.786,3.623,0.316 ttnn.logiteps_bw,200,4.401,4.438,6.012,0.456 ttnn.lt,200,0.033,0.035,0.142,0.015 ttnn.lt_bw,200,1.635,1.747,1.749, @@ -267,14 +267,17 @@ ttnn.ltz,200,0.022,0.023,0.102,0.009 ttnn.matmul,200,0.027,0.029,3.139,0.007 ttnn.max_bw,200,1.218,1.262,2.008,0.164 ttnn.max_dim_2,200,0.035,0.037,0.084,0.007 -ttnn.max_dim_23,200,0.039,0.046,0.081,0.012 -ttnn.max_dim_3,200,0.03,0.032,0.059,0.007 +ttnn.max_dim_23,200,0.046,0.053,0.081,0.016 +ttnn.max_dim_3,200,0.034,0.038,0.059,0.007 ttnn.min_bw,200,1.242,1.289,2.011,0.185 -ttnn.min_dim_2,200,0.066,0.069,0.192,0.024 +ttnn.min_dim_2,200,0.08,0.091,0.185,0.027 ttnn.min_dim_23,200,0.072,0.075,0.184,0.027 ttnn.min_dim_3,200,0.066,0.07,0.167,0.023 ttnn.mish,200,0.09,0.097,0.578,0.034 ttnn.mul,200,0.035,0.036,0.143,0.014 +ttnn.mul_bcast_h,200,0.068,0.07,0.186,0.05 +ttnn.mul_bcast_w,200,0.069,0.072,0.188,0.051 +ttnn.mul_bcast_hw,200,0.039,0.04,0.201,0.019 ttnn.mul_bw,200,0.06,0.062,0.28,0.025 ttnn.mul_unary,200,0.066,0.069,0.128,0.047 ttnn.multigammaln,200,23.793,23.819,26.245,10.039 @@ -314,7 +317,7 @@ ttnn.rsqrt_fast_and_approx_False,200,0.025,0.026,0.793,0.009 ttnn.rsqrt_fast_and_approx_True,200,0.024,0.025,0.793,0.009 ttnn.rsub,200,0.024,0.025,0.102,0.009 ttnn.rsub_bw,200,0.105,0.123,0.216,0.063 -ttnn.scale_mask_softmax_in_place,200,0.074,0.076,0.127,0.045 +ttnn.scale_mask_softmax_in_place,200,0.086,0.089,0.129,0.052 ttnn.selu_bw,200,0.329,0.378,1.202,0.199 ttnn.sigmoid,200,0.023,0.024,0.136,0.009 ttnn.sigmoid_accurate,200,0.023,0.023,0.109,0.009 @@ -326,38 +329,38 @@ ttnn.silu_bw,200,1.101,1.117,1.541,0.121 ttnn.sin,200,0.022,0.023,0.151,0.009 ttnn.sin_bw,200,0.063,0.068,0.292,0.024 ttnn.sinh,200,1.038,1.048,1.278,0.056 -ttnn.sinh_bw,200,2.77,2.835,5.21,0.53 +ttnn.sinh_bw,200,3.844,3.864,5.754,0.398 ttnn.slice,200,0.031,0.033,0.098,0.012 ttnn.softmax_in_place,200,0.072,0.075,0.113,0.051 ttnn.softplus,200,0.024,0.025,0.285,0.009 ttnn.softplus_bw,200,0.452,0.475,1.559,0.269 ttnn.softshrink,200,0.26,0.267,0.841,0.148 ttnn.softshrink_bw,200,2.851,2.874,2.976,0.113 -ttnn.softsign,200,0.137,0.142,0.449,0.079 +ttnn.softsign,200,0.155,0.163,0.448,0.087 ttnn.softsign_bw,200,0.059,0.062,0.248,0.021 ttnn.sqrt,200,0.023,0.023,0.103,0.009 -ttnn.sqrt_bw,200,0.777,0.943,2.968,0.499 +ttnn.sqrt_bw,200,0.883,0.918,2.96,0.476 ttnn.square,200,0.023,0.024,0.102,0.009 ttnn.square_bw,200,0.116,0.121,0.254,0.066 -ttnn.squared_difference,200,0.03,0.032,0.143,0.013 +ttnn.squared_difference,200,0.034,0.037,0.14,0.014 ttnn.squared_difference_bw,200,0.202,0.206,0.502,0.119 ttnn.sub,200,0.033,0.036,0.147,0.014 ttnn.sub_bw,200,0.085,0.089,0.215,0.055 ttnn.sub_unary,200,0.064,0.068,0.115,0.047 ttnn.subalpha_bw,200,0.117,0.147,0.215,0.06 ttnn.sum_dim_2,200,0.031,0.033,0.084,0.007 -ttnn.sum_dim_23,200,0.038,0.041,0.082,0.012 +ttnn.sum_dim_23,200,0.045,0.051,0.079,0.014 ttnn.sum_dim_3,200,0.031,0.034,0.06,0.007 ttnn.swish,200,0.029,0.03,0.137,0.009 ttnn.tan,200,0.023,0.023,0.132,0.009 ttnn.tan_bw,200,0.145,0.149,0.478,0.078 ttnn.tanh,200,0.022,0.023,0.101,0.009 ttnn.tanh_bw,200,0.087,0.092,0.437,0.039 -ttnn.tanhshrink,200,0.053,0.055,0.242,0.021 +ttnn.tanhshrink,200,0.065,0.071,0.238,0.024 ttnn.tanhshrink_bw,200,0.078,0.089,0.34,0.033 -ttnn.tilize,200,0.012,0.012,0.015, +ttnn.tilize,200,0.021,0.021,0.111,0.007 ttnn.tilize_with_val_padding,200,0.027,0.028,3.303,0.007 -ttnn.tilize_with_zero_padding,200,0.011,0.011,0.015, +ttnn.tilize_with_zero_padding,200,0.021,0.022,2.457,0.007 ttnn.threshold,200,0.244,0.264,0.709,0.142 ttnn.threshold_bw,200,1.088,1.109,1.434,0.119 ttnn.tril,120,97.567,97.777,97.524,0.069 @@ -366,12 +369,12 @@ ttnn.trunc_bw,200,0.85,0.883,0.895, ttnn.unary_add_bw,200,0.022,0.023,0.026, ttnn.unary_div_bw,200,1.041,1.118,1.086,0.055 ttnn.unary_pow_bw,200,0.526,0.536,1.919,0.281 -ttnn.where,200,0.125,0.13,0.627,0.056 +ttnn.where,200,0.142,0.147,0.616,0.055 ttnn.where_binary_x_const_y,200,0.169,0.184,0.596,0.098 ttnn.where_binary_x_y_const,200,0.172,0.179,0.594,0.093 ttnn.where_bw,200,0.316,0.327,1.182,0.179 ttnn.where_x_const_const,200,0.202,0.204,0.559,0.124 -ttnn.xlogy,200,0.407,0.427,1.457,0.207 +ttnn.xlogy,200,1.103,1.131,1.82,0.112 ttnn.xlogy_bw,200,2.066,2.09,5.443,0.571 ttnn.zeros,200,0.868,0.88,0.897, ttnn.zeros_like,200,0.833,0.845,0.865, diff --git a/tests/ttnn/unit_tests/operations/backward/test_backward_div.py b/tests/ttnn/unit_tests/operations/backward/test_backward_div.py index b2c2afbadee..9b61f3ecd65 100644 --- a/tests/ttnn/unit_tests/operations/backward/test_backward_div.py +++ b/tests/ttnn/unit_tests/operations/backward/test_backward_div.py @@ -11,7 +11,7 @@ ) -@pytest.mark.skip(reason="this test is failing because ttnn.bias_gelu_bw doesn't have a corresponding API call") +@pytest.mark.skip(reason="this test is failing because ttnn.div_bw doesn't have a corresponding API call") @pytest.mark.parametrize( "input_shapes", ( @@ -44,7 +44,7 @@ def test_bw_div_binary(input_shapes, round_mode, device): assert status -@pytest.mark.skip(reason="this test is failing because ttnn.bias_gelu_bw doesn't have a corresponding API call") +@pytest.mark.skip(reason="this test is failing because ttnn.div_bw doesn't have a corresponding API call") @pytest.mark.parametrize( "input_shapes", (