diff --git a/tests/tt_eager/python_api_testing/sweep_tests/op_map.py b/tests/tt_eager/python_api_testing/sweep_tests/op_map.py index 0619a89396f..aedf91b4842 100644 --- a/tests/tt_eager/python_api_testing/sweep_tests/op_map.py +++ b/tests/tt_eager/python_api_testing/sweep_tests/op_map.py @@ -144,6 +144,10 @@ "tt_op": tt_lib_ops.eltwise_div_unary, "pytorch_op": pytorch_ops.div_unary, }, + "eltwise-unary_div": { + "tt_op": tt_lib_ops.eltwise_unary_div, + "pytorch_op": pytorch_ops.unary_div, + }, "eltwise-mul_unary": { "tt_op": tt_lib_ops.eltwise_mul_unary, "pytorch_op": pytorch_ops.mul_unary, diff --git a/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_div.py b/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_div.py index 3037ed06cdb..38ffe3c2123 100644 --- a/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_div.py +++ b/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_div.py @@ -16,6 +16,7 @@ from tests.tt_eager.python_api_testing.sweep_tests.run_pytorch_ci_tests import ( run_single_pytorch_test, ) +from models.utility_functions import skip_for_grayskull mem_configs = [ ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), @@ -24,6 +25,7 @@ @pytest.mark.parametrize("accurate_mode", [False, True]) +@pytest.mark.parametrize("round_mode", ["None", "trunc", "floor"]) @pytest.mark.parametrize( "input_shapes", [ @@ -36,10 +38,12 @@ "dst_mem_config", mem_configs, ) +@skip_for_grayskull("#ToDo: GS implementation needs to be done for floor and trunc") class TestDiv: def test_run_div( self, accurate_mode, + round_mode, input_shapes, dst_mem_config, device, @@ -47,23 +51,24 @@ def test_run_div( if accurate_mode == False: # If input_b is non-zero tensor datagen_func = [ generation_funcs.gen_func_with_cast( - partial(generation_funcs.gen_rand, low=-100, high=100), torch.bfloat16 + partial(generation_funcs.gen_rand, low=-1e6, high=1e6), torch.bfloat16 ) ] + [ generation_funcs.gen_func_with_cast( - partial(generation_funcs.gen_rand, low=-100, high=-1), torch.bfloat16 + partial(generation_funcs.gen_rand, low=-1e6, high=-1), torch.bfloat16 ) ] else: datagen_func = [ generation_funcs.gen_func_with_cast( - partial(generation_funcs.gen_rand, low=-100, high=100), torch.bfloat16 + partial(generation_funcs.gen_rand, low=-1e6, high=1e6), torch.bfloat16 ) ] * 2 test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0] test_args.update( { "accurate_mode": accurate_mode, + "round_mode": round_mode, } ) test_args.update({"output_mem_config": dst_mem_config}) diff --git a/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_div_unary.py b/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_div_unary.py new file mode 100644 index 00000000000..f5d88a58796 --- /dev/null +++ b/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_div_unary.py @@ -0,0 +1,77 @@ +# SPDX-FileCopyrightText: © 2023-24 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + +import pytest +import torch +import random +from functools import partial +import tt_lib as ttl + + +from tests.tt_eager.python_api_testing.sweep_tests import ( + comparison_funcs, + generation_funcs, +) +from tests.tt_eager.python_api_testing.sweep_tests.run_pytorch_ci_tests import ( + run_single_pytorch_test, +) +from models.utility_functions import skip_for_grayskull + +mem_configs = [ + ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), + ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), +] + + +@pytest.mark.parametrize("accurate_mode", [True]) +@pytest.mark.parametrize("round_mode", ["None", "trunc", "floor"]) +@pytest.mark.parametrize( + "input_shapes", + [ + [[1, 1, 32, 32], [1, 1, 32, 32]], + [[1, 1, 320, 384], [1, 1, 320, 384]], + [[1, 3, 320, 384], [1, 3, 320, 384]], + ], +) +@pytest.mark.parametrize( + "scalar", + {random.uniform(-100, 100) for _ in range(3)}, +) +@pytest.mark.parametrize( + "dst_mem_config", + mem_configs, +) +@skip_for_grayskull("#ToDo: GS implementation needs to be done for floor and trunc") +class TestUnary_Div: + def test_run_unary_div( + self, + accurate_mode, + round_mode, + input_shapes, + scalar, + dst_mem_config, + device, + ): + datagen_func = [ + generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-1e6, high=1e6), torch.bfloat16) + ] * 2 + test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0] + test_args.update( + { + "accurate_mode": accurate_mode, + "round_mode": round_mode, + "scalar": scalar, + } + ) + test_args.update({"output_mem_config": dst_mem_config}) + comparison_func = comparison_funcs.comp_pcc + + run_single_pytorch_test( + "eltwise-unary_div", + input_shapes, + datagen_func, + comparison_func, + device, + test_args, + ) diff --git a/tests/tt_eager/python_api_testing/sweep_tests/pytorch_ops.py b/tests/tt_eager/python_api_testing/sweep_tests/pytorch_ops.py index 6d034c372f4..9fdffcf1981 100644 --- a/tests/tt_eager/python_api_testing/sweep_tests/pytorch_ops.py +++ b/tests/tt_eager/python_api_testing/sweep_tests/pytorch_ops.py @@ -713,9 +713,10 @@ def silu(x, *args, **kwargs): return torch.nn.functional.silu(x) -def div(x, y, *args, accurate_mode, **kwargs): - result = torch.div(x, y) - return result +def div(x, y, *args, accurate_mode, round_mode, **kwargs): + if round_mode == "None": + return torch.div(x, y) + return torch.div(x, y, rounding_mode=round_mode) def div_no_nan(x, y, *args, **kwargs): @@ -737,6 +738,12 @@ def div_unary(x, *args, scalar, **kwargs): return result +def unary_div(x, *args, scalar, accurate_mode, round_mode, **kwargs): + if round_mode == "None": + return torch.div(x, scalar) + return torch.div(x, scalar, rounding_mode=round_mode) + + def mul_unary(x, *args, scalar, **kwargs): result = torch.mul(x, scalar) return result diff --git a/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py b/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py index 51e01df1654..02f99b23b57 100644 --- a/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py +++ b/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py @@ -1060,6 +1060,7 @@ def eltwise_div( y, *args, accurate_mode, + round_mode, device, dtype, layout, @@ -1069,7 +1070,7 @@ def eltwise_div( ): t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0]) t1 = setup_tt_tensor(y, device, layout[1], input_mem_config[1], dtype[1]) - t2 = ttl.tensor.div(t0, t1, accurate_mode, output_mem_config=output_mem_config) + t2 = ttl.tensor.div(t0, t1, accurate_mode, round_mode, output_mem_config=output_mem_config) return tt2torch_tensor(t2) @@ -1697,6 +1698,26 @@ def eltwise_div_unary( return tt2torch_tensor(t1) +@setup_host_and_device +def eltwise_unary_div( + x, + *args, + scalar, + accurate_mode, + round_mode, + device, + dtype, + layout, + input_mem_config, + output_mem_config, + **kwargs, +): + t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0]) + t1 = ttl.tensor.div(t0, scalar, accurate_mode, round_mode, output_mem_config=output_mem_config) + + return tt2torch_tensor(t1) + + @setup_host_and_device def eltwise_mul_unary( x, diff --git a/tt_eager/tt_dnn/op_library/composite/composite_ops.cpp b/tt_eager/tt_dnn/op_library/composite/composite_ops.cpp index 0216818083b..33c979c1247 100644 --- a/tt_eager/tt_dnn/op_library/composite/composite_ops.cpp +++ b/tt_eager/tt_dnn/op_library/composite/composite_ops.cpp @@ -885,11 +885,21 @@ Tensor addcdiv( return operation::decorate_as_composite(__func__, _addcdiv)(input_a, input_b, input_c, value, output_mem_config); } -Tensor _div(const Tensor& input_a, const Tensor& input_b, bool accurate_mode, const MemoryConfig& output_mem_config) { +Tensor _div(const Tensor& input_a, const Tensor& input_b, bool accurate_mode, string round_mode, const MemoryConfig& output_mem_config) { + TT_FATAL((round_mode == "None" || round_mode == "trunc" || round_mode == "floor") && "Incorrect rounding mode (expected 'None', 'trunc', or 'floor')"); Tensor result = ttnn::divide(input_a, input_b); + + if(round_mode == "trunc"){ + result = trunc(result); + } + else if(round_mode == "floor"){ + result = floor(result); + } + if (accurate_mode == false) { // If input_b is non-zero tensor return result; } + Tensor t_inf = full_like(input_a, std::numeric_limits::infinity(), output_mem_config); Tensor t_nan = full_like(input_a, std::nanf(""), output_mem_config); return where( @@ -902,8 +912,25 @@ Tensor _div(const Tensor& input_a, const Tensor& input_b, bool accurate_mode, co result, output_mem_config); } -Tensor div(const Tensor& input_a, const Tensor& input_b, bool accurate_mode, const MemoryConfig& output_mem_config) { - return operation::decorate_as_composite(__func__, _div)(input_a, input_b, accurate_mode, output_mem_config); +Tensor div(const Tensor& input_a, const Tensor& input_b, bool accurate_mode, string round_mode, const MemoryConfig& output_mem_config) { + return operation::decorate_as_composite(__func__, _div)(input_a, input_b, accurate_mode, round_mode, output_mem_config); +} + +Tensor _div_overload(const Tensor& input_a, float scalar, bool accurate_mode, string round_mode, const MemoryConfig& output_mem_config) { + TT_FATAL((round_mode == "None" || round_mode == "trunc" || round_mode == "floor") && "Incorrect rounding mode (expected 'None', 'trunc', or 'floor')"); + Tensor result = div_unary(input_a, scalar); + + if(round_mode == "trunc"){ + result = trunc(result); + } + else if(round_mode == "floor"){ + result = floor(result); + } + + return result; +} +Tensor div(const Tensor& input_a, float scalar, bool accurate_mode, string round_mode, const MemoryConfig& output_mem_config) { + return operation::decorate_as_composite(__func__, _div_overload)(input_a, scalar, accurate_mode, round_mode, output_mem_config); } Tensor _trunc(const Tensor& input, const MemoryConfig& output_mem_config) { diff --git a/tt_eager/tt_dnn/op_library/composite/composite_ops.hpp b/tt_eager/tt_dnn/op_library/composite/composite_ops.hpp index 45923d76bc2..cb33321d2ef 100644 --- a/tt_eager/tt_dnn/op_library/composite/composite_ops.hpp +++ b/tt_eager/tt_dnn/op_library/composite/composite_ops.hpp @@ -183,6 +183,14 @@ Tensor div( const Tensor& input_a, const Tensor& input_b, bool accurate_mode = false, + string round_mode = "None", + const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG); + +Tensor div( + const Tensor& input_a, + float scalar, + bool accurate_mode = false, + string round_mode = "None", const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG); Tensor div_no_nan( diff --git a/tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_composite_ops.cpp b/tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_composite_ops.cpp index cc47de1c0de..e8d6ecba181 100644 --- a/tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_composite_ops.cpp +++ b/tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_composite_ops.cpp @@ -1109,10 +1109,11 @@ void TensorModuleCompositeOPs(py::module& m_tensor) { m_tensor.def( "div", - &div, + py::overload_cast(&div), py::arg("input_a").noconvert(), py::arg("input_b").noconvert(), py::arg("accurate_mode") = false, + py::arg("round_mode") = "None", py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, R"doc( Performs the element-wise division of ``input_a`` by ``input_b``. @@ -1128,6 +1129,33 @@ void TensorModuleCompositeOPs(py::module& m_tensor) { "input_a", "Numerator Tensor", "Tensor", "Tensor of shape [W, Z, Y, X]", "Yes" "input_b", "Denominator Tensor", "Tensor", "Tensor of shape [W, Z, Y, X]", "Yes" "accurate_mode", "Mode of Implementation", "bool", "default to false", "No" + "round_mode", "Mode of Rounding", "String", "default to None", "No" + "output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No" + )doc"); + + m_tensor.def( + "div", + py::overload_cast(&div), + py::arg("input_a").noconvert(), + py::arg("scalar").noconvert(), + py::arg("accurate_mode") = false, + py::arg("round_mode") = "None", + py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, + R"doc( + Performs the element-wise division of tensor ``input_a`` by ``scalar`` value. + If scalar value is non-zero, then ``accurate_mode`` can be ``false``,else set ``accurate_mode`` to ``true`` + + Input tensor must have BFLOAT16 data type. + + Output tensor will have BFLOAT16 data type. + + .. csv-table:: + :header: "Argument", "Description", "Data type", "Valid range", "Required" + + "input_a", "Numerator Tensor", "Tensor", "Tensor of shape [W, Z, Y, X]", "Yes" + "scalar", "Denominator value", "float", "", "Yes" + "accurate_mode", "Mode of Implementation", "bool", "default to false", "No" + "round_mode", "Mode of Rounding", "String", "default to None", "No" "output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No" )doc");