Skip to content

Commit

Permalink
#15647:Update ceil op (#15657)
Browse files Browse the repository at this point in the history
### Ticket
Link to Github Issue #15647 

### Problem description

- Ceil op ignore values outside range of i16

### What's changed

- Updated the logic to support fp32

### Profiling Results : Shape used [1, 1, 102400, 32]

Kernel Duration [ns]
- Bfloat16 : 70841
- Float32 : 153649

### Checklist
- [ ] All Post commit CI
  • Loading branch information
mouliraj-mcw authored Dec 12, 2024
1 parent 784b72b commit 1509a6e
Show file tree
Hide file tree
Showing 10 changed files with 124 additions and 8 deletions.
19 changes: 19 additions & 0 deletions tests/ttnn/unit_tests/operations/eltwise/test_unary.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,3 +447,22 @@ def test_unary_floor(input_shapes, device):
golden_tensor = golden_function(in_data1)
output_tensor = ttnn.to_torch(output_tensor)
assert_with_pcc(golden_tensor, output_tensor, 0.999)


@skip_for_grayskull()
@pytest.mark.parametrize(
"input_shapes",
(
(torch.Size([1, 1, 32, 32])),
(torch.Size([1, 1, 320, 384])),
(torch.Size([1, 3, 320, 384])),
),
)
def test_unary_ceil(input_shapes, device):
in_data1 = torch.empty(input_shapes, dtype=torch.float32).uniform_(-43566, 43565)
input_tensor1 = ttnn.from_torch(in_data1, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
output_tensor = ttnn.ceil(input_tensor1)
golden_function = ttnn.get_golden_function(ttnn.ceil)
golden_tensor = golden_function(in_data1)
output_tensor = ttnn.to_torch(output_tensor)
assert_with_pcc(golden_tensor, output_tensor, 0.999)
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "sfpi.h"
#include "noc_nonblocking_api.h"
#include "limits.h"
#include "ckernel_sfpu_floor.h"

using namespace sfpi;

Expand All @@ -20,7 +21,7 @@ inline void calculate_ceil() {
for (int d = 0; d < ITERATIONS; d++) {
vFloat result = dst_reg[0];
vFloat v = result;
vInt tmp = float_to_int16(result, 0); // TODO: Replace float_to_int16 to float_to_int32 once it is available
vInt tmp = float_to_int16(result, 0);
result = int32_to_float(tmp, 0);
v_if(result < v) { result = result + 1; }
v_endif;
Expand All @@ -31,5 +32,19 @@ inline void calculate_ceil() {
}
}

template <bool APPROXIMATION_MODE, int ITERATIONS = 8>
inline void calculate_ceil_float32() {
for (int d = 0; d < ITERATIONS; d++) {
vFloat result = dst_reg[0];
vFloat v = result;
vInt tmp = float_to_int32(result);
result = int32_to_float(tmp, 0);
v_if(result < v) { result = result + 1; }
v_endif;
dst_reg[0] = result;
dst_reg++;
}
}

} // namespace sfpu
} // namespace ckernel
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,9 @@ inline void llk_math_eltwise_unary_sfpu_ceil(uint dst_index, int vector_mode = (
llk_math_eltwise_unary_sfpu_params<APPROXIMATE>(ckernel::sfpu::calculate_ceil<APPROXIMATE>, dst_index, vector_mode);
}

template <bool APPROXIMATE>
inline void llk_math_eltwise_unary_sfpu_ceil_float32(uint dst_index, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_unary_sfpu_params<APPROXIMATE>(
ckernel::sfpu::calculate_ceil_float32<APPROXIMATE>, dst_index, vector_mode);
}
} // namespace ckernel
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "sfpi.h"
#include "noc_nonblocking_api.h"
#include "limits.h"
#include "ckernel_sfpu_floor.h"

using namespace sfpi;

Expand All @@ -20,7 +21,7 @@ inline void calculate_ceil() {
for (int d = 0; d < ITERATIONS; d++) {
vFloat result = dst_reg[0];
vFloat v = result;
vInt tmp = float_to_int16(result, 0); // TODO: Replace float_to_int16 to float_to_int32 once it is available
vInt tmp = float_to_int16(result, 0);
result = int32_to_float(tmp, 0);
v_if(result < v) { result = result + 1; }
v_endif;
Expand All @@ -31,5 +32,18 @@ inline void calculate_ceil() {
}
}

template <bool APPROXIMATION_MODE, int ITERATIONS = 8>
inline void calculate_ceil_float32() {
for (int d = 0; d < ITERATIONS; d++) {
vFloat result = dst_reg[0];
vFloat v = result;
vInt tmp = float_to_int32(result);
result = int32_to_float(tmp, 0);
v_if(result < v) { result = result + 1; }
v_endif;
dst_reg[0] = result;
dst_reg++;
}
}
} // namespace sfpu
} // namespace ckernel
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,9 @@ inline void llk_math_eltwise_unary_sfpu_ceil(uint dst_index, int vector_mode = (
llk_math_eltwise_unary_sfpu_params<APPROXIMATE>(ckernel::sfpu::calculate_ceil<APPROXIMATE>, dst_index, vector_mode);
}

template <bool APPROXIMATE>
inline void llk_math_eltwise_unary_sfpu_ceil_float32(uint dst_index, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_unary_sfpu_params<APPROXIMATE>(
ckernel::sfpu::calculate_ceil_float32<APPROXIMATE>, dst_index, vector_mode);
}
} // namespace ckernel
18 changes: 17 additions & 1 deletion tt_metal/include/compute_kernel_api/eltwise_unary/ceil.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,25 @@ ALWI void ceil_tile_init() { MATH((llk_math_eltwise_unary_sfpu_ceil_init<APPROX>
* | Argument | Description | Type | Valid
* Range | Required |
* |-----------------|----------------------------------------------------------------------------|----------|-------------------------------------------------------|----------|
* | idst | The index of the tile in DST register buffer to modify the sign bit of | uint32_t | Must be
* | idst | The index of the tile in DST register buffer to perform ceil operation | uint32_t | Must be
* less than the size of the DST register buffer | True |
*/
ALWI void ceil_tile(uint32_t idst) { MATH((llk_math_eltwise_unary_sfpu_ceil<APPROX>(idst))); }

/**
* Performs ceil operation on each row of a tile.
* in DST register at index tile_index. The DST register buffer must be in
* acquired state via *acquire_dst* call. This call is blocking and is only
* available on the compute engine.
*
* Return value: None
*
* | Argument | Description | Type | Valid
* Range | Required |
* |-----------------|----------------------------------------------------------------------------|----------|-------------------------------------------------------|----------|
* | idst | The index of the tile in DST register buffer to perform ceil operation | uint32_t | Must be
* less than the size of the DST register buffer | True |
*/
ALWI void ceil_tile_float32(uint32_t idst) { MATH((llk_math_eltwise_unary_sfpu_ceil_float32<APPROX>(idst))); }

} // namespace ckernel
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ enum class UnaryOpType {
FLOOR,
FLOOR_FLOAT32,
CEIL,
CEIL_FLOAT32,
LEFT_SHIFT,
REMAINDER,
FMOD,
Expand Down
10 changes: 7 additions & 3 deletions ttnn/cpp/ttnn/operations/eltwise/unary/common/unary_op_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ void update_macro_defines(UnaryOpType op_type, std::map<std::string, std::string
case UnaryOpType::IDENTITY_UINT32: defines["SFPU_OP_IDENTITY_INCLUDE"] = "1"; break;
case UnaryOpType::FLOOR:
case UnaryOpType::FLOOR_FLOAT32: defines["SFPU_OP_FLOOR_INCLUDE"] = "1"; break;
case UnaryOpType::CEIL:
case UnaryOpType::CEIL_FLOAT32: defines["SFPU_OP_CEIL_INCLUDE"] = "1"; break;
case UnaryOpType::RDIV: break;
case UnaryOpType::RSUB: defines["SFPU_OP_REVERSE_FAMILY_INCLUDE"] = "1";
case UnaryOpType::ISINF:
Expand All @@ -73,7 +75,6 @@ void update_macro_defines(UnaryOpType op_type, std::map<std::string, std::string
case UnaryOpType::BITWISE_AND: defines["SFPU_OP_BITWISE_AND_INCLUDE"] = "1"; break;
case UnaryOpType::BITWISE_OR: defines["SFPU_OP_BITWISE_OR_INCLUDE"] = "1"; break;
case UnaryOpType::RIGHT_SHIFT: defines["SFPU_OP_RIGHT_SHIFT_INCLUDE"] = "1"; break;
case UnaryOpType::CEIL: defines["SFPU_OP_CEIL_INCLUDE"] = "1"; break;
case UnaryOpType::LEFT_SHIFT: defines["SFPU_OP_LEFT_SHIFT_INCLUDE"] = "1"; break;
case UnaryOpType::REMAINDER: defines["SFPU_OP_REMAINDER_INCLUDE"] = "1"; break;
case UnaryOpType::FMOD: defines["SFPU_OP_FMOD_INCLUDE"] = "1"; break;
Expand Down Expand Up @@ -282,7 +283,6 @@ std::pair<string, string> get_op_init_and_func_default(UnaryOpType op_type, std:
case UnaryOpType::SIGNBIT:
op_init_and_name = {"signbit_tile_init();", fmt::format("signbit_tile({});", idst)};
break;
case UnaryOpType::CEIL: op_init_and_name = {"ceil_tile_init();", fmt::format("ceil_tile({});", idst)}; break;
case UnaryOpType::SIN: op_init_and_name = {"sin_tile_init();", fmt::format("sin_tile({});", idst)}; break;
case UnaryOpType::COS: op_init_and_name = {"cos_tile_init();", fmt::format("cos_tile({});", idst)}; break;
case UnaryOpType::ISFINITE:
Expand Down Expand Up @@ -344,7 +344,11 @@ std::pair<string, string> get_op_init_and_func_default(UnaryOpType op_type, std:
op_init_and_name = {"floor_tile_init();", fmt::format("floor_tile({});", idst)};
break;
case UnaryOpType::FLOOR_FLOAT32:
op_init_and_name = {"floor_tile_init();", fmt::format("floor_tile_float32({});", idst)}; break;
op_init_and_name = {"floor_tile_init();", fmt::format("floor_tile_float32({});", idst)};
break;
case UnaryOpType::CEIL: op_init_and_name = {"ceil_tile_init();", fmt::format("ceil_tile({});", idst)}; break;
case UnaryOpType::CEIL_FLOAT32:
op_init_and_name = {"ceil_tile_init();", fmt::format("ceil_tile_float32({});", idst)};
break;
case UnaryOpType::RELU6:
op_init_and_name = {"relu_max_tile_init();", fmt::format("relu_max_tile({}, 0x40c00000u);", idst)};
Expand Down
27 changes: 26 additions & 1 deletion ttnn/cpp/ttnn/operations/eltwise/unary/unary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ template struct ExecuteUnary<UnaryOpType::ERFINV>;
template struct ExecuteUnary<UnaryOpType::EXP2>;
template struct ExecuteUnary<UnaryOpType::EXPM1>;
template struct ExecuteUnary<UnaryOpType::EQZ>;
template struct ExecuteUnary<UnaryOpType::CEIL>;
template struct ExecuteUnary<UnaryOpType::GEZ>;
template struct ExecuteUnary<UnaryOpType::GTZ>;
template struct ExecuteUnary<UnaryOpType::I0>;
Expand Down Expand Up @@ -362,6 +361,32 @@ Tensor Floor::invoke(
DefaultQueueId, input_tensor, {UnaryWithParam{op_type}}, memory_config, optional_output_tensor);
}

Tensor Ceil::invoke(
uint8_t queue_id,
const Tensor& input_tensor,
const std::optional<MemoryConfig>& memory_config,
const std::optional<Tensor>& optional_output_tensor) {
UnaryOpType op_type = UnaryOpType::CEIL;
if (input_tensor.get_dtype() == DataType::FLOAT32) {
op_type = UnaryOpType::CEIL_FLOAT32;
}

return detail::unary_impl(queue_id, input_tensor, {UnaryWithParam{op_type}}, memory_config, optional_output_tensor);
}

Tensor Ceil::invoke(
const Tensor& input_tensor,
const std::optional<MemoryConfig>& memory_config,
const std::optional<Tensor>& optional_output_tensor) {
UnaryOpType op_type = UnaryOpType::CEIL;
if (input_tensor.get_dtype() == DataType::FLOAT32) {
op_type = UnaryOpType::CEIL_FLOAT32;
}

return detail::unary_impl(
DefaultQueueId, input_tensor, {UnaryWithParam{op_type}}, memory_config, optional_output_tensor);
}

Tensor Dropout::invoke(
const Tensor& input,
const uint32_t seed,
Expand Down
14 changes: 13 additions & 1 deletion ttnn/cpp/ttnn/operations/eltwise/unary/unary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,18 @@ struct Floor {
const std::optional<Tensor>& optional_output_tensor = std::nullopt);
};

struct Ceil {
static Tensor invoke(
uint8_t queue_id,
const Tensor& input_tensor,
const std::optional<MemoryConfig>& memory_config = std::nullopt,
const std::optional<Tensor>& optional_output_tensor = std::nullopt);

static Tensor invoke(
const Tensor& input_tensor,
const std::optional<MemoryConfig>& memory_config = std::nullopt,
const std::optional<Tensor>& optional_output_tensor = std::nullopt);
};
struct Dropout {
static Tensor invoke(
const Tensor& input,
Expand Down Expand Up @@ -294,7 +306,6 @@ REGISTER_UNARY_OPERATION(erfinv, ERFINV);
REGISTER_UNARY_OPERATION(exp2, EXP2);
REGISTER_UNARY_OPERATION(expm1, EXPM1);
REGISTER_UNARY_OPERATION(eqz, EQZ);
REGISTER_UNARY_OPERATION(ceil, CEIL);
REGISTER_UNARY_OPERATION(gez, GEZ);
REGISTER_UNARY_OPERATION(gtz, GTZ);
REGISTER_UNARY_OPERATION(i0, I0);
Expand Down Expand Up @@ -368,6 +379,7 @@ constexpr auto identity =
ttnn::register_operation_with_auto_launch_op<"ttnn::identity", ttnn::operations::unary::Identity>();
constexpr auto floor =
ttnn::register_operation_with_auto_launch_op<"ttnn::floor", ttnn::operations::unary::Floor>();
constexpr auto ceil = ttnn::register_operation_with_auto_launch_op<"ttnn::ceil", ttnn::operations::unary::Ceil>();
constexpr auto softplus =
ttnn::register_operation_with_auto_launch_op<"ttnn::softplus", ttnn::operations::unary::Softplus>();
constexpr auto prelu_sfpu =
Expand Down

0 comments on commit 1509a6e

Please sign in to comment.