Skip to content

Commit

Permalink
#9473: Add rounding_mode support for forward div op (#9474)
Browse files Browse the repository at this point in the history
#9473: Add rounding_mode support fgor forward div_op
  • Loading branch information
mouliraj-mcw authored Jun 24, 2024
1 parent a7234fb commit 986637b
Show file tree
Hide file tree
Showing 8 changed files with 188 additions and 11 deletions.
4 changes: 4 additions & 0 deletions tests/tt_eager/python_api_testing/sweep_tests/op_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@
"tt_op": tt_lib_ops.eltwise_div_unary,
"pytorch_op": pytorch_ops.div_unary,
},
"eltwise-unary_div": {
"tt_op": tt_lib_ops.eltwise_unary_div,
"pytorch_op": pytorch_ops.unary_div,
},
"eltwise-mul_unary": {
"tt_op": tt_lib_ops.eltwise_mul_unary,
"pytorch_op": pytorch_ops.mul_unary,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from tests.tt_eager.python_api_testing.sweep_tests.run_pytorch_ci_tests import (
run_single_pytorch_test,
)
from models.utility_functions import skip_for_grayskull

mem_configs = [
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
Expand All @@ -24,6 +25,7 @@


@pytest.mark.parametrize("accurate_mode", [False, True])
@pytest.mark.parametrize("round_mode", ["None", "trunc", "floor"])
@pytest.mark.parametrize(
"input_shapes",
[
Expand All @@ -36,34 +38,37 @@
"dst_mem_config",
mem_configs,
)
@skip_for_grayskull("#ToDo: GS implementation needs to be done for floor and trunc")
class TestDiv:
def test_run_div(
self,
accurate_mode,
round_mode,
input_shapes,
dst_mem_config,
device,
):
if accurate_mode == False: # If input_b is non-zero tensor
datagen_func = [
generation_funcs.gen_func_with_cast(
partial(generation_funcs.gen_rand, low=-100, high=100), torch.bfloat16
partial(generation_funcs.gen_rand, low=-1e6, high=1e6), torch.bfloat16
)
] + [
generation_funcs.gen_func_with_cast(
partial(generation_funcs.gen_rand, low=-100, high=-1), torch.bfloat16
partial(generation_funcs.gen_rand, low=-1e6, high=-1), torch.bfloat16
)
]
else:
datagen_func = [
generation_funcs.gen_func_with_cast(
partial(generation_funcs.gen_rand, low=-100, high=100), torch.bfloat16
partial(generation_funcs.gen_rand, low=-1e6, high=1e6), torch.bfloat16
)
] * 2
test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0]
test_args.update(
{
"accurate_mode": accurate_mode,
"round_mode": round_mode,
}
)
test_args.update({"output_mem_config": dst_mem_config})
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# SPDX-FileCopyrightText: © 2023-24 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

import pytest
import torch
import random
from functools import partial
import tt_lib as ttl


from tests.tt_eager.python_api_testing.sweep_tests import (
comparison_funcs,
generation_funcs,
)
from tests.tt_eager.python_api_testing.sweep_tests.run_pytorch_ci_tests import (
run_single_pytorch_test,
)
from models.utility_functions import skip_for_grayskull

mem_configs = [
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
]


@pytest.mark.parametrize("accurate_mode", [True])
@pytest.mark.parametrize("round_mode", ["None", "trunc", "floor"])
@pytest.mark.parametrize(
"input_shapes",
[
[[1, 1, 32, 32], [1, 1, 32, 32]],
[[1, 1, 320, 384], [1, 1, 320, 384]],
[[1, 3, 320, 384], [1, 3, 320, 384]],
],
)
@pytest.mark.parametrize(
"scalar",
{random.uniform(-100, 100) for _ in range(3)},
)
@pytest.mark.parametrize(
"dst_mem_config",
mem_configs,
)
@skip_for_grayskull("#ToDo: GS implementation needs to be done for floor and trunc")
class TestUnary_Div:
def test_run_unary_div(
self,
accurate_mode,
round_mode,
input_shapes,
scalar,
dst_mem_config,
device,
):
datagen_func = [
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-1e6, high=1e6), torch.bfloat16)
] * 2
test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0]
test_args.update(
{
"accurate_mode": accurate_mode,
"round_mode": round_mode,
"scalar": scalar,
}
)
test_args.update({"output_mem_config": dst_mem_config})
comparison_func = comparison_funcs.comp_pcc

run_single_pytorch_test(
"eltwise-unary_div",
input_shapes,
datagen_func,
comparison_func,
device,
test_args,
)
13 changes: 10 additions & 3 deletions tests/tt_eager/python_api_testing/sweep_tests/pytorch_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,9 +713,10 @@ def silu(x, *args, **kwargs):
return torch.nn.functional.silu(x)


def div(x, y, *args, accurate_mode, **kwargs):
result = torch.div(x, y)
return result
def div(x, y, *args, accurate_mode, round_mode, **kwargs):
if round_mode == "None":
return torch.div(x, y)
return torch.div(x, y, rounding_mode=round_mode)


def div_no_nan(x, y, *args, **kwargs):
Expand All @@ -737,6 +738,12 @@ def div_unary(x, *args, scalar, **kwargs):
return result


def unary_div(x, *args, scalar, accurate_mode, round_mode, **kwargs):
if round_mode == "None":
return torch.div(x, scalar)
return torch.div(x, scalar, rounding_mode=round_mode)


def mul_unary(x, *args, scalar, **kwargs):
result = torch.mul(x, scalar)
return result
Expand Down
23 changes: 22 additions & 1 deletion tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1060,6 +1060,7 @@ def eltwise_div(
y,
*args,
accurate_mode,
round_mode,
device,
dtype,
layout,
Expand All @@ -1069,7 +1070,7 @@ def eltwise_div(
):
t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0])
t1 = setup_tt_tensor(y, device, layout[1], input_mem_config[1], dtype[1])
t2 = ttl.tensor.div(t0, t1, accurate_mode, output_mem_config=output_mem_config)
t2 = ttl.tensor.div(t0, t1, accurate_mode, round_mode, output_mem_config=output_mem_config)

return tt2torch_tensor(t2)

Expand Down Expand Up @@ -1697,6 +1698,26 @@ def eltwise_div_unary(
return tt2torch_tensor(t1)


@setup_host_and_device
def eltwise_unary_div(
x,
*args,
scalar,
accurate_mode,
round_mode,
device,
dtype,
layout,
input_mem_config,
output_mem_config,
**kwargs,
):
t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0])
t1 = ttl.tensor.div(t0, scalar, accurate_mode, round_mode, output_mem_config=output_mem_config)

return tt2torch_tensor(t1)


@setup_host_and_device
def eltwise_mul_unary(
x,
Expand Down
33 changes: 30 additions & 3 deletions tt_eager/tt_dnn/op_library/composite/composite_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -885,11 +885,21 @@ Tensor addcdiv(
return operation::decorate_as_composite(__func__, _addcdiv)(input_a, input_b, input_c, value, output_mem_config);
}

Tensor _div(const Tensor& input_a, const Tensor& input_b, bool accurate_mode, const MemoryConfig& output_mem_config) {
Tensor _div(const Tensor& input_a, const Tensor& input_b, bool accurate_mode, string round_mode, const MemoryConfig& output_mem_config) {
TT_FATAL((round_mode == "None" || round_mode == "trunc" || round_mode == "floor") && "Incorrect rounding mode (expected 'None', 'trunc', or 'floor')");
Tensor result = ttnn::divide(input_a, input_b);

if(round_mode == "trunc"){
result = trunc(result);
}
else if(round_mode == "floor"){
result = floor(result);
}

if (accurate_mode == false) { // If input_b is non-zero tensor
return result;
}

Tensor t_inf = full_like(input_a, std::numeric_limits<float>::infinity(), output_mem_config);
Tensor t_nan = full_like(input_a, std::nanf(""), output_mem_config);
return where(
Expand All @@ -902,8 +912,25 @@ Tensor _div(const Tensor& input_a, const Tensor& input_b, bool accurate_mode, co
result,
output_mem_config);
}
Tensor div(const Tensor& input_a, const Tensor& input_b, bool accurate_mode, const MemoryConfig& output_mem_config) {
return operation::decorate_as_composite(__func__, _div)(input_a, input_b, accurate_mode, output_mem_config);
Tensor div(const Tensor& input_a, const Tensor& input_b, bool accurate_mode, string round_mode, const MemoryConfig& output_mem_config) {
return operation::decorate_as_composite(__func__, _div)(input_a, input_b, accurate_mode, round_mode, output_mem_config);
}

Tensor _div_overload(const Tensor& input_a, float scalar, bool accurate_mode, string round_mode, const MemoryConfig& output_mem_config) {
TT_FATAL((round_mode == "None" || round_mode == "trunc" || round_mode == "floor") && "Incorrect rounding mode (expected 'None', 'trunc', or 'floor')");
Tensor result = div_unary(input_a, scalar);

if(round_mode == "trunc"){
result = trunc(result);
}
else if(round_mode == "floor"){
result = floor(result);
}

return result;
}
Tensor div(const Tensor& input_a, float scalar, bool accurate_mode, string round_mode, const MemoryConfig& output_mem_config) {
return operation::decorate_as_composite(__func__, _div_overload)(input_a, scalar, accurate_mode, round_mode, output_mem_config);
}

Tensor _trunc(const Tensor& input, const MemoryConfig& output_mem_config) {
Expand Down
8 changes: 8 additions & 0 deletions tt_eager/tt_dnn/op_library/composite/composite_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,14 @@ Tensor div(
const Tensor& input_a,
const Tensor& input_b,
bool accurate_mode = false,
string round_mode = "None",
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

Tensor div(
const Tensor& input_a,
float scalar,
bool accurate_mode = false,
string round_mode = "None",
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

Tensor div_no_nan(
Expand Down
30 changes: 29 additions & 1 deletion tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_composite_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1109,10 +1109,11 @@ void TensorModuleCompositeOPs(py::module& m_tensor) {

m_tensor.def(
"div",
&div,
py::overload_cast<const Tensor&, const Tensor&, bool, string, const MemoryConfig&>(&div),
py::arg("input_a").noconvert(),
py::arg("input_b").noconvert(),
py::arg("accurate_mode") = false,
py::arg("round_mode") = "None",
py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG,
R"doc(
Performs the element-wise division of ``input_a`` by ``input_b``.
Expand All @@ -1128,6 +1129,33 @@ void TensorModuleCompositeOPs(py::module& m_tensor) {
"input_a", "Numerator Tensor", "Tensor", "Tensor of shape [W, Z, Y, X]", "Yes"
"input_b", "Denominator Tensor", "Tensor", "Tensor of shape [W, Z, Y, X]", "Yes"
"accurate_mode", "Mode of Implementation", "bool", "default to false", "No"
"round_mode", "Mode of Rounding", "String", "default to None", "No"
"output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No"
)doc");

m_tensor.def(
"div",
py::overload_cast<const Tensor&, float, bool, string, const MemoryConfig&>(&div),
py::arg("input_a").noconvert(),
py::arg("scalar").noconvert(),
py::arg("accurate_mode") = false,
py::arg("round_mode") = "None",
py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG,
R"doc(
Performs the element-wise division of tensor ``input_a`` by ``scalar`` value.
If scalar value is non-zero, then ``accurate_mode`` can be ``false``,else set ``accurate_mode`` to ``true``
Input tensor must have BFLOAT16 data type.
Output tensor will have BFLOAT16 data type.
.. csv-table::
:header: "Argument", "Description", "Data type", "Valid range", "Required"
"input_a", "Numerator Tensor", "Tensor", "Tensor of shape [W, Z, Y, X]", "Yes"
"scalar", "Denominator value", "float", "", "Yes"
"accurate_mode", "Mode of Implementation", "bool", "default to false", "No"
"round_mode", "Mode of Rounding", "String", "default to None", "No"
"output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No"
)doc");

Expand Down

0 comments on commit 986637b

Please sign in to comment.