Skip to content

Commit

Permalink
#8681: Add ceil op
Browse files Browse the repository at this point in the history
  • Loading branch information
mouliraj-mcw committed Jul 12, 2024
1 parent dcdaad3 commit e3d0f90
Show file tree
Hide file tree
Showing 29 changed files with 187 additions and 54 deletions.
2 changes: 2 additions & 0 deletions docs/source/ttnn/ttnn/dependencies/tt_lib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,8 @@ Tensor elementwise operations

.. autofunction:: tt_lib.tensor.floor

.. autofunction:: tt_lib.tensor.ceil

.. autofunction:: tt_lib.tensor.trunc

.. autofunction:: tt_lib.tensor.frac
Expand Down
6 changes: 5 additions & 1 deletion tests/tt_eager/python_api_testing/sweep_tests/op_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,10 @@
"tt_op": tt_lib_ops.eltwise_floor,
"pytorch_op": pytorch_ops.floor,
},
"eltwise-ceil": {
"tt_op": tt_lib_ops.eltwise_ceil,
"pytorch_op": pytorch_ops.ceil,
},
"eltwise-trunc": {
"tt_op": tt_lib_ops.eltwise_trunc,
"pytorch_op": pytorch_ops.trunc,
Expand All @@ -648,7 +652,7 @@
"tt_op": tt_lib_ops.eltwise_unary_floor_div,
"pytorch_op": pytorch_ops.unary_floor_div,
},
"eltwise-_rfloor_div": {
"eltwise-rfloor_div": {
"tt_op": tt_lib_ops.eltwise_rfloor_div,
"pytorch_op": pytorch_ops.rfloor_div,
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ def test_run_div_trunc(
device,
):
datagen_func = [
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-100, high=100), torch.bfloat16)
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-1e6, high=1e6), torch.bfloat16)
] + [
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-100, high=100), torch.bfloat16)
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-1e6, high=1e6), torch.bfloat16)
]
test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0]
test_args.update({"output_mem_config": dst_mem_config})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ def test_run_eltwise_sign_ops(
test_args,
)

@pytest.mark.parametrize("round_off_method", ["floor", "trunc"])
@pytest.mark.parametrize("round_off_method", ["floor", "ceil", "trunc"])
@skip_for_grayskull("#ToDo: GS implementation needs to be done for Floor")
def test_run_eltwise_round_off_ops(
self,
Expand All @@ -597,9 +597,7 @@ def test_run_eltwise_round_off_ops(
output_mem_config,
):
datagen_func = [
generation_funcs.gen_func_with_cast(
partial(generation_funcs.gen_rand, low=-1000, high=1000), torch.bfloat16
)
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-1e6, high=1e6), torch.bfloat16)
]
test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0]
test_args.update(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
@pytest.mark.parametrize(
"input_shapes",
[
[[1, 1, 32, 32], [1, 1, 32, 32]],
[[1, 1, 320, 384], [1, 1, 320, 384]],
[[1, 3, 320, 384], [1, 3, 320, 384]],
[[1, 1, 32, 32]],
[[1, 1, 320, 384]],
[[1, 3, 320, 384]],
],
)
@pytest.mark.parametrize(
Expand All @@ -44,7 +44,7 @@ def test_run_frac(
device,
):
datagen_func = [
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-100, high=100), torch.bfloat16)
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-1e6, high=1e6), torch.bfloat16)
]
test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0]
test_args.update({"output_mem_config": dst_mem_config})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest
import torch
import random
import numpy as np
from functools import partial
import tt_lib as ttl
from tests.tt_eager.python_api_testing.sweep_tests import (
Expand All @@ -29,10 +30,6 @@
[[1, 3, 320, 384], [1, 3, 320, 384]],
],
)
@pytest.mark.parametrize(
"value",
[-5.1, 0.0, 10.9],
)
@pytest.mark.parametrize(
"dst_mem_config",
mem_configs,
Expand All @@ -42,17 +39,17 @@ class TestRfloor_div:
def test_run_rfloor_div(
self,
input_shapes,
value,
dst_mem_config,
device,
):
datagen_func = [
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-100, high=100), torch.bfloat16)
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-1e6, high=1e6), torch.bfloat16)
]
test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0]
test_args.update({"value": value})
test_args.update({"value": random.uniform(-100, 100) for _ in range(5)})
test_args.update({"output_mem_config": dst_mem_config})
comparison_func = comparison_funcs.comp_pcc

run_single_pytorch_test(
"eltwise-rfloor_div",
input_shapes,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest
import torch
import random
import numpy as np
from functools import partial
import tt_lib as ttl
from tests.tt_eager.python_api_testing.sweep_tests import (
Expand All @@ -29,10 +30,6 @@
[[1, 3, 320, 384], [1, 3, 320, 384]],
],
)
@pytest.mark.parametrize(
"value",
[-5.1, 0.0, 10.9],
)
@pytest.mark.parametrize(
"dst_mem_config",
mem_configs,
Expand All @@ -42,17 +39,17 @@ class TestUnary_Div_Trunc:
def test_run_unary_div_trunc(
self,
input_shapes,
value,
dst_mem_config,
device,
):
datagen_func = [
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-100, high=100), torch.bfloat16)
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-1e6, high=1e6), torch.bfloat16)
]
test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0]
test_args.update({"value": value})
test_args.update({"value": random.uniform(-100, 100) for _ in range(5)})
test_args.update({"output_mem_config": dst_mem_config})
comparison_func = comparison_funcs.comp_pcc

run_single_pytorch_test(
"eltwise-unary_div_trunc",
input_shapes,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest
import torch
import random
import numpy as np
from functools import partial
import tt_lib as ttl
from tests.tt_eager.python_api_testing.sweep_tests import (
Expand All @@ -29,10 +30,6 @@
[[1, 3, 320, 384], [1, 3, 320, 384]],
],
)
@pytest.mark.parametrize(
"value",
[-5.1, 0.0, 10.9],
)
@pytest.mark.parametrize(
"dst_mem_config",
mem_configs,
Expand All @@ -42,17 +39,17 @@ class TestUnary_Rdiv_Trunc:
def test_run_unary_rdiv_trunc(
self,
input_shapes,
value,
dst_mem_config,
device,
):
datagen_func = [
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-100, high=100), torch.bfloat16)
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-1e6, high=1e6), torch.bfloat16)
]
test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0]
test_args.update({"value": value})
test_args.update({"value": random.uniform(-100, 100) for _ in range(5)})
test_args.update({"output_mem_config": dst_mem_config})
comparison_func = comparison_funcs.comp_pcc

run_single_pytorch_test(
"eltwise-unary_rdiv_trunc",
input_shapes,
Expand Down
4 changes: 4 additions & 0 deletions tests/tt_eager/python_api_testing/sweep_tests/pytorch_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,10 @@ def floor(x, *args, **kwargs):
return torch.floor(x)


def ceil(x, *args, **kwargs):
return torch.ceil(x)


def trunc(x, *args, **kwargs):
return torch.trunc(x)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2645,6 +2645,7 @@ def unary_op(
transpose_nw = make_unary_op(partial(ttl.tensor.transpose, dim0=0, dim1=-1))
transpose_cw = make_unary_op(partial(ttl.tensor.transpose, dim0=1, dim1=-1))
eltwise_floor = make_unary_op(ttl.tensor.floor)
eltwise_ceil = make_unary_op(ttl.tensor.ceil)
eltwise_trunc = make_unary_op(ttl.tensor.trunc)
eltwise_frac = make_unary_op(ttl.tensor.frac)

Expand Down
10 changes: 9 additions & 1 deletion tt_eager/tt_dnn/op_library/composite/composite_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -995,8 +995,10 @@ Tensor trunc(const Tensor& input, const MemoryConfig& output_mem_config) {
}

Tensor _frac(const Tensor& input, const MemoryConfig& output_mem_config) {
auto arch = input.device()->arch();
TT_FATAL(arch == tt::ARCH::WORMHOLE_B0, "Op is only supported on Wormhole");
Tensor trunc_res = trunc(input, output_mem_config);
Tensor result = sub(input, trunc_res, std::nullopt, output_mem_config);
Tensor result = ttnn::subtract(input, trunc_res, std::nullopt, output_mem_config);
return result;
}
Tensor frac(const Tensor& input, const MemoryConfig& output_mem_config) {
Expand All @@ -1007,6 +1009,8 @@ Tensor _div_trunc(
const Tensor& input_a,
const Tensor& input_b,
const MemoryConfig& output_mem_config) {
auto arch = input_a.device()->arch();
TT_FATAL(arch == tt::ARCH::WORMHOLE_B0, "Op is only supported on Wormhole");
Tensor result = div(input_a, input_b, true);
return trunc(result);
}
Expand All @@ -1021,6 +1025,8 @@ Tensor _div_trunc_overload(
const Tensor& input,
float value,
const MemoryConfig& output_mem_config) {
auto arch = input.device()->arch();
TT_FATAL(arch == tt::ARCH::WORMHOLE_B0, "Op is only supported on Wormhole");
Tensor result = div_unary(input, value);
return trunc(result);
}
Expand All @@ -1035,6 +1041,8 @@ Tensor _unary_rdiv_trunc(
float value,
const Tensor& input,
const MemoryConfig& output_mem_config) {
auto arch = input.device()->arch();
TT_FATAL(arch == tt::ARCH::WORMHOLE_B0, "Op is only supported on Wormhole");
Tensor result = div_unary(value, input);
return trunc(result);
}
Expand Down
4 changes: 1 addition & 3 deletions tt_eager/tt_dnn/op_library/composite/composite_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,13 +190,11 @@ Tensor fmod(
const Tensor& input_b,
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

<<<<<<< HEAD
Tensor trunc(const Tensor& input, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);
=======

Tensor frac(
const Tensor& input,
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);
>>>>>>> #8681: Add frac op

Tensor round(
const Tensor& input,
Expand Down
18 changes: 9 additions & 9 deletions tt_eager/tt_dnn/op_library/conv/conv_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ pair<vector<uint32_t>, vector<uint32_t>> compute_conv_activation_as_mm_shape(Sha
// pad height
uint32_t num_rows = (uint32_t) conv_output_h*conv_output_w;
uint32_t act_block_h_datums = act_block_h_ntiles * TILE_HEIGHT;
uint32_t num_rows_padded = (uint32_t) (ceil((double) num_rows / (double) act_block_h_datums ) * act_block_h_datums);
uint32_t num_rows_padded = (uint32_t) (std::ceil((double) num_rows / (double) act_block_h_datums ) * act_block_h_datums);
uint32_t num_cols = conv_activation_shape[3] * filter_h * filter_w;
uint32_t act_block_w_datums = act_block_w_ntiles * TILE_WIDTH;
uint32_t num_cols_padded = (uint32_t) (ceil((double) num_cols / (double) act_block_w_datums ) * act_block_w_datums);
uint32_t num_cols_padded = (uint32_t) (std::ceil((double) num_cols / (double) act_block_w_datums ) * act_block_w_datums);
if(use_fast_reader) {
assert(act_block_w_datums >= conv_activation_shape[3] * filter_w);
num_cols_padded = act_block_w_datums * filter_h;
Expand Down Expand Up @@ -218,7 +218,7 @@ operation::ProgramWithCallbacks conv_as_large_bmm_single_core_(const Tensor& a,
uint32_t output_channels_padded_to_tile_width = round_up(output_channels, TILE_WIDTH);
assert(output_channels_padded_to_tile_width <= weight_matrix_width);
uint32_t output_width_num_tiles = output_channels_padded_to_tile_width / TILE_WIDTH;
uint32_t num_blocks_output_w = (uint32_t) ceil((double) output_channels_padded_to_tile_width / (double) weight_block_w_datums);
uint32_t num_blocks_output_w = (uint32_t) std::ceil((double) output_channels_padded_to_tile_width / (double) weight_block_w_datums);
uint32_t last_block_width_datums = (output_channels_padded_to_tile_width % weight_block_w_datums == 0) ? weight_block_w_datums : (output_channels_padded_to_tile_width % weight_block_w_datums);
assert(last_block_width_datums % TILE_WIDTH == 0);
uint32_t output_row_size_bytes = output_channels_padded_to_tile_width * num_bytes_of_df;
Expand Down Expand Up @@ -726,7 +726,7 @@ std::pair<vector<uint32_t>, vector<uint32_t>> generate_conv_weight_address_map(
address_map_metadata.push_back(address_map_current_group_dram_address_offset);
address_map_metadata.push_back(address_map_current_group_size);
// Pad 0s in address map buffer to ensure each read address is 32B aligned (32/sizeof(uint32_t) == 8 elements)
uint32_t address_map_current_group_size_padded = (uint32_t) (ceil((double) address_map_current_group_size / (double) 8) * 8);
uint32_t address_map_current_group_size_padded = (uint32_t) (std::ceil((double) address_map_current_group_size / (double) 8) * 8);
if(address_map_current_group_size_padded != address_map_current_group_size) {
assert(address_map_current_group_size_padded > address_map_current_group_size);
address_map.insert(address_map.end(), address_map_current_group_size_padded - address_map_current_group_size, 0);
Expand Down Expand Up @@ -764,8 +764,8 @@ std::pair<vector<uint32_t>, vector<uint32_t>> generate_conv_activation_address_m
int conv_output_w = ((conv_input_y - S + (2 * Pad_W)) / V) + 1;
uint32_t matrix_height_unpadded = conv_output_h * conv_output_w;
uint32_t matrix_width_unpadded = conv_input_z * R * S;
uint32_t matrix_height = (uint32_t) (ceil((double) matrix_height_unpadded / (double) act_block_h_datums ) * act_block_h_datums);
uint32_t matrix_width = (uint32_t) (ceil((double) matrix_width_unpadded / (double) act_block_w_datums ) * act_block_w_datums);
uint32_t matrix_height = (uint32_t) (std::ceil((double) matrix_height_unpadded / (double) act_block_h_datums ) * act_block_h_datums);
uint32_t matrix_width = (uint32_t) (std::ceil((double) matrix_width_unpadded / (double) act_block_w_datums ) * act_block_w_datums);

uint32_t num_groups = num_blocks_act_h * num_blocks_act_w * num_blocks_weight_w;
uint32_t channel_stick_size = conv_input_z;
Expand Down Expand Up @@ -854,7 +854,7 @@ std::pair<vector<uint32_t>, vector<uint32_t>> generate_conv_activation_address_m
address_map_metadata.push_back(address_map_current_group_dram_address_offset);
address_map_metadata.push_back(address_map_current_group_size);
// Pad 0s in address map buffer to ensure each read address is 32B aligned (32/sizeof(uint32_t) == 8 elements)
uint32_t address_map_current_group_size_padded = (uint32_t) (ceil((double) address_map_current_group_size / (double) 8) * 8);
uint32_t address_map_current_group_size_padded = (uint32_t) (std::ceil((double) address_map_current_group_size / (double) 8) * 8);
if(address_map_current_group_size_padded != address_map_current_group_size) {
assert(address_map_current_group_size_padded > address_map_current_group_size);
address_map.insert(address_map.end(), address_map_current_group_size_padded - address_map_current_group_size, 0);
Expand Down Expand Up @@ -903,7 +903,7 @@ std::pair<vector<uint32_t>, vector<uint32_t>> populate_address_map_vectors_for_r
address_map_raw_current_group_start + current_group_size);
address_map_raw_index += current_group_size;
// Pad 0s in address map buffer to ensure each read address is 32B aligned (32/sizeof(uint32_t) == 8 elements)
uint32_t current_group_size_padded = (uint32_t) (ceil((double) current_group_size / (double) 8) * 8);
uint32_t current_group_size_padded = (uint32_t) (std::ceil((double) current_group_size / (double) 8) * 8);
if(current_group_size_padded != current_group_size) {
assert(current_group_size_padded > current_group_size);
address_map.insert(address_map.end(), current_group_size_padded - current_group_size, 0);
Expand Down Expand Up @@ -988,7 +988,7 @@ operation::ProgramWithCallbacks conv_as_large_bmm_with_address_map_single_core_(
// it removes the padding done for block width but it doesn't remove padding done for tiled width
uint32_t output_channels_padded_to_tile_width = round_up(output_channels, TILE_WIDTH);
assert(output_channels_padded_to_tile_width <= Wb);
uint32_t num_blocks_output_w = (uint32_t) ceil((double) output_channels_padded_to_tile_width / (double) weight_block_w_datums);
uint32_t num_blocks_output_w = (uint32_t) std::ceil((double) output_channels_padded_to_tile_width / (double) weight_block_w_datums);
uint32_t last_block_width_datums = (output_channels_padded_to_tile_width % weight_block_w_datums == 0) ? weight_block_w_datums : (output_channels_padded_to_tile_width % weight_block_w_datums);
assert(last_block_width_datums % TILE_WIDTH == 0);
uint32_t output_row_size_bytes = output_channels_padded_to_tile_width * num_bytes_of_df;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_(const Tensor& a, cons
uint32_t output_channels_padded_to_tile_width = round_up(output_channels, TILE_WIDTH);
assert(output_channels_padded_to_tile_width <= weight_matrix_width);
uint32_t output_width_num_tiles = output_channels_padded_to_tile_width / TILE_WIDTH;
uint32_t num_blocks_output_w = (uint32_t) ceil((double) output_channels_padded_to_tile_width / (double) weight_block_w_datums);
uint32_t num_blocks_output_w = (uint32_t) std::ceil((double) output_channels_padded_to_tile_width / (double) weight_block_w_datums);
uint32_t last_block_width_datums = (output_channels_padded_to_tile_width % weight_block_w_datums == 0) ? weight_block_w_datums : (output_channels_padded_to_tile_width % weight_block_w_datums);
assert(last_block_width_datums % TILE_WIDTH == 0);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_sharded_(const Tensor&
uint32_t output_channels_padded_to_tile_width = round_up(output_channels, TILE_WIDTH);
assert(output_channels_padded_to_tile_width <= weight_matrix_width);
uint32_t output_width_num_tiles = output_channels_padded_to_tile_width / TILE_WIDTH;
uint32_t num_blocks_output_w = (uint32_t) ceil((double) output_channels_padded_to_tile_width / (double) weight_block_w_datums);
uint32_t num_blocks_output_w = (uint32_t) std::ceil((double) output_channels_padded_to_tile_width / (double) weight_block_w_datums);
uint32_t last_block_width_datums = (output_channels_padded_to_tile_width % weight_block_w_datums == 0) ? weight_block_w_datums : (output_channels_padded_to_tile_width % weight_block_w_datums);
assert(last_block_width_datums % TILE_WIDTH == 0);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_sharded_v2_impl(
assert(output_channels_padded_to_tile_width <= weight_matrix_width);
uint32_t output_width_num_tiles = output_channels_padded_to_tile_width / TILE_WIDTH;
uint32_t num_blocks_output_w =
(uint32_t)ceil((double)output_channels_padded_to_tile_width / (double)weight_block_w_datums);
(uint32_t)std::ceil((double)output_channels_padded_to_tile_width / (double)weight_block_w_datums);
uint32_t last_block_width_datums = (output_channels_padded_to_tile_width % weight_block_w_datums == 0)
? weight_block_w_datums
: (output_channels_padded_to_tile_width % weight_block_w_datums);
Expand Down
2 changes: 1 addition & 1 deletion tt_eager/tt_dnn/op_library/conv/optimized_conv_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ pair<vector<uint32_t>, vector<uint32_t>> compute_opt_conv_activation_as_mm_shape
// pad height
uint32_t num_rows = (uint32_t) batch_size * conv_output_h * conv_output_w;
uint32_t act_block_h_datums = act_block_h_ntiles * TILE_HEIGHT;
uint32_t num_rows_padded = (uint32_t) (ceil((double) num_rows / (double) act_block_h_datums ) * act_block_h_datums);
uint32_t num_rows_padded = (uint32_t) (std::ceil((double) num_rows / (double) act_block_h_datums ) * act_block_h_datums);
uint32_t num_cols = conv_activation_shape[3] * filter_h * filter_w;
uint32_t num_cols_padded = round_up(conv_activation_shape[3] * filter_w, TILE_WIDTH) * filter_h;
return {{1, num_rows_padded, num_cols_padded}, {1, num_rows, num_cols}};
Expand Down
Loading

0 comments on commit e3d0f90

Please sign in to comment.