diff --git a/bangc-ops/kernels/ml_nms/ml_nms.cpp b/bangc-ops/kernels/ml_nms/ml_nms.cpp new file mode 100644 index 000000000..132840513 --- /dev/null +++ b/bangc-ops/kernels/ml_nms/ml_nms.cpp @@ -0,0 +1,141 @@ +/************************************************************************* +* Copyright (C) [2022] by Cambricon, Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +************************************************************************/ +#include +#include +#include "core/context.h" +#include "core/gen_case.h" +#include "core/logging.h" +#include "core/runtime/device.h" +#include "core/tensor.h" +#include "core/type.h" +#include "kernels/kernel.h" +#include "mlu_op_kernel.h" +#include "mlu_op.h" +#include "cnrt.h" +#include "cndev.h" + +static inline bool isSupportType(const mluOpDataType_t check_type, + const mluOpDataType_t support_type[], + const int len) { + for (int i = 0; i < len; ++i) { + if (check_type == support_type[i]) { + return true; + } + } + return false; +} + +mluOpStatus_t MlNmsParamCheck( + const std::string &op_name, const mluOpHandle_t &handle, + const mluOpTensorDescriptor_t &x_desc, const void *x, + const mluOpDataType_t support_type[], const int &len) { + PARAM_CHECK(op_name, x_desc != NULL); + PARAM_CHECK(op_name, handle != NULL); + + // check data type + if (!isSupportType(x_desc->dtype, support_type, len)) { + LOG(ERROR) << op_name << ":x_desc's data type is not supported."; + return MLUOP_STATUS_BAD_PARAM; + } + PARAM_CHECK(op_name, x != NULL); + return MLUOP_STATUS_SUCCESS; +} + + +static void policyFunc(const mluOpHandle_t &handle, + const mluOpTensorDescriptor_t desc, cnrtDim3_t *k_dim, + cnrtFunctionType_t *k_type) { + size_t dim = mluOpGetTensorElementNum(desc); + // Union1 policyFunc + *k_type = CNRT_FUNC_TYPE_UNION1; + k_dim->x = handle->core_num_per_cluster; + k_dim->y = mluop::runtime::getClusterLimitCapability(handle); + k_dim->z = 1; + // if a case is smaller than 2048 , it just need one cluster can work best. + size_t small_case_thread = 2048; + if (dim <= small_case_thread) k_dim->y = 1; +} + +mluOpStatus_t MLUOP_WIN_API mluOpMlNms(mluOpHandle_t handle, + const mluOpTensorDescriptor_t boxes_data_ptr_desc, void* boxes_data_ptr, + float iou_threshold, void* output_boxes_index) { + + mluOpDataType_t support_type[2] = {MLUOP_DTYPE_HALF, MLUOP_DTYPE_FLOAT}; + mluOpStatus_t param_check = MlNmsParamCheck( + "[mluOpMlNms]", handle, boxes_data_ptr_desc, boxes_data_ptr, + support_type, 2); + + if (param_check != MLUOP_STATUS_SUCCESS) { + return param_check; + } + + cnrtDim3_t k_dim; + cnrtFunctionType_t k_type; + policyFunc(handle, boxes_data_ptr_desc, &k_dim, &k_type); + int input_boxes_num = boxes_data_ptr_desc->total_element_num / 6; + int apply_nram_size = 0; + int boxes_start_position = 0; + int loop_num = 0; + void (*mluOpFuncKernel)(cnrtDim3_t k_dim, cnrtFunctionType_t k_type, + cnrtQueue_t queue, mluOpDataType_t data_type, void* boxes_data_ptr, + float nmsThres, int input_boxes_num, int boxes_start_position, + uint8_t* output_boxes_index); + + if (boxes_data_ptr_desc->dtype == MLUOP_DTYPE_HALF) { + mluOpFuncKernel = mluOpKernelMlNmsHalfFast; + apply_nram_size = (input_boxes_num * 6 * 2) + (input_boxes_num * 14 * 2); + } else { + mluOpFuncKernel = mluOpKernelMlNmsFloatFast; + apply_nram_size = (input_boxes_num * 6 * 4) + (input_boxes_num * 14 * 4); + } + if (apply_nram_size > MAX_NRAM_SIZE) { + if ((apply_nram_size % MAX_NRAM_SIZE) !=0) { + loop_num = (apply_nram_size / MAX_NRAM_SIZE) + 1; + } else { + loop_num = apply_nram_size / MAX_NRAM_SIZE; + } + } + if (loop_num > 0) { + for (int i = 0; i < loop_num; i++) { + boxes_start_position = i * (input_boxes_num / loop_num); + KERNEL_CHECK((mluOpFuncKernel(k_dim, k_type, handle->queue, + boxes_data_ptr_desc->dtype, + boxes_data_ptr, + iou_threshold, + input_boxes_num, + boxes_start_position, + (uint8_t*)output_boxes_index))); + } + } else { + KERNEL_CHECK((mluOpFuncKernel(k_dim, k_type, handle->queue, + boxes_data_ptr_desc->dtype, + boxes_data_ptr, + iou_threshold, + input_boxes_num, + boxes_start_position, + (uint8_t*)output_boxes_index))); + } + GEN_CASE_END(); + + return MLUOP_STATUS_SUCCESS; +} diff --git a/bangc-ops/kernels/ml_nms/ml_nms.mlu b/bangc-ops/kernels/ml_nms/ml_nms.mlu new file mode 100644 index 000000000..ee0fbcc4c --- /dev/null +++ b/bangc-ops/kernels/ml_nms/ml_nms.mlu @@ -0,0 +1,252 @@ +/************************************************************************* +* Copyright (C) [2022] by Cambricon, Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +************************************************************************/ +#include "bang.h" +#include "mlu_op_kernel.h" +#include "kernels/kernel.h" + +#define NRAM_SIZE 2 * 1024 +#define UNION_OP_KERNEL_DECLARE(Op, DType, Prefer) \ + __mlu_global__ void MLUBlockKernel##Op##DType##Prefer( \ + mluOpDataType_t data_type, void* boxes_data_ptr, \ + float nms_thres, int input_boxes_num, int boxes_start_position, \ + uint8_t* output_boxes_index); \ + +#define UNION_OP_KERNEL_IMPLE(Op, DType, Prefer) \ + __mlu_global__ void MLUOpKernel##Op##DType##Prefer( \ + mluOpDataType_t data_type, void* boxes_data_ptr, \ + float nms_thres, int input_boxes_num, int boxes_start_position, \ + uint8_t* output_boxes_index) { \ + int offset, seg; \ + getOffsetNum##Op##Prefer(input_boxes_num, &offset); \ + getSegNumMlNmsFast(input_boxes_num, &seg); \ + unionImple( \ + (DType*)boxes_data_ptr, (DType)nms_thres, \ + offset, seg, input_boxes_num, boxes_start_position, output_boxes_index);} + +template +__mlu_device__ void unionImple(T* boxes_data_ptr, + T nms_thres, int offset, int seg, int input_boxes_num, + int boxes_start_position, uint8_t* output_boxes_index) { + __nram__ char worke_space[MAX_NRAM_SIZE / 16]; + __memcpy((T*)worke_space, + boxes_data_ptr + ((boxes_start_position + offset) * 6), + seg * 6 * sizeof(T), + GDRAM2NRAM); + __memcpy((T*)worke_space + (seg * 6), + boxes_data_ptr, + 6 * sizeof(T), + GDRAM2NRAM); + OpFunc((T*)worke_space, nms_thres, input_boxes_num, offset, + seg, output_boxes_index); +} + +__mlu_func__ void getComputeLen(int seg, int elem_byte, int* compute_len) { +#if (__BANG_ARCH__ < 200) + *compute_len = (seg * elem_byte % 64) == 0 ? + seg : (seg * elem_byte / 64 + 1) * 64 / elem_byte; +#elif (__BANG_ARCH__ > 200 && __BANG_ARCH__ < 300) + *compute_len = (seg * elem_byte % 128) == 0 ? + seg : (seg * elem_byte / 128 + 1) * 128 / elem_byte; +#elif (__BANG_ARCH__ > 300) + *compute_len = seg; +#endif +} +__mlu_func__ void getOffsetNumMlNmsFast(int input_boxes_num, int* offset) { + if (taskDim > 1) { + *offset = (input_boxes_num % taskDim) > taskId ? + (input_boxes_num / taskDim + 1) * taskId : + (input_boxes_num / taskDim) * taskId + (input_boxes_num % taskDim); + } else { + *offset = input_boxes_num; + } +} + +__mlu_func__ void getSegNumMlNmsFast(int input_boxes_num, int* seg) { + if (taskDim > 1) { + *seg = (input_boxes_num / taskDim) + + uint32_t((input_boxes_num % taskDim) > taskId); + } else { + *seg = input_boxes_num; + } +} + +template +__mlu_func__ void computeMlNmsFast(T* worke_space, + T nms_thres, int input_boxes_num, int offset, + int seg, uint8_t* output_boxes_index) { + __nram__ T scores_max_boxes_area; + __nram__ T w_s, h_s; + __nram__ T* scores_max_boxes; + __nram__ T* x1; + __nram__ T* y1; + __nram__ T* x2; + __nram__ T* y2; + __nram__ T* w; + __nram__ T* h; + __nram__ T* area_ptr; + __nram__ T* inter_area_ptr; + __nram__ T* scores_max_boxes_area_ptr; + __nram__ T* nms_thres_ptr; + __nram__ T* scores_max_boxes_ptr; + __nram__ T* tem; + __nram__ uint8_t* similar_index; + __nram__ uint8_t* result; + int compute_len; + int i, j; + int data_len = seg * 6 + 6; + + // ----------------------allocate memory--------------------- + getComputeLen(seg, sizeof(T), &compute_len); + scores_max_boxes = worke_space + (seg * 6); + x1 = worke_space + data_len; + y1 = worke_space + (data_len + compute_len); + x2 = worke_space + (data_len + (compute_len * 2)); + y2 = worke_space + (data_len + (compute_len * 3)); + data_len = data_len + (compute_len * 4); + w = worke_space + data_len; + h = worke_space + (data_len + compute_len); + area_ptr = worke_space + (data_len + (compute_len * 2)); + inter_area_ptr = worke_space + (data_len + (compute_len * 3)); + scores_max_boxes_area_ptr = worke_space + (data_len + (compute_len * 4)); + nms_thres_ptr = worke_space + (data_len + (compute_len * 5)); + scores_max_boxes_ptr = worke_space + (data_len + (compute_len * 6)); + tem = worke_space + (data_len + (compute_len * 7)); + if (sizeof(T) == sizeof(uint8_t)) { + similar_index = (uint8_t*)worke_space + (data_len + (compute_len * 8)); + result = (uint8_t*)worke_space + (data_len + (compute_len * 8) + seg); + } else { + similar_index = (uint8_t*)worke_space + ((data_len + (compute_len * 8)) * + (sizeof(T) / sizeof(uint8_t))); + result = (uint8_t*)worke_space + ((data_len + (compute_len * 8)) * + (sizeof(T) / sizeof(uint8_t)) + seg); + } + for (i = 0, j = 0; i < seg * 6; i+=6, j++) { + if (*(scores_max_boxes + 5) == worke_space[i + 5]) { + similar_index[j] = 1; + x1[j] = worke_space[i]; + y1[j] = worke_space[i + 1]; + x2[j] = worke_space[i + 2]; + y2[j] = worke_space[i + 3]; + } else { + similar_index[j] = 0; + x1[j] = 0.0; + y1[j] = 0.0; + x2[j] = 0.0; + y2[j] = 0.0; + } + } + + // -----------------iou detect-------------------- + // fing all boxes area + __bang_sub(h, y1, y2, compute_len); + __bang_sub(w, x2, x1, compute_len); + __bang_mul(area_ptr, h, w, compute_len); + + // max x1 + __bang_write_value(scores_max_boxes_ptr, compute_len, scores_max_boxes[0]); + __bang_cycle_sub(x1, x1, scores_max_boxes_ptr, compute_len, compute_len); + __bang_active_relu(x1, x1, compute_len); + __bang_cycle_add(x1, x1, scores_max_boxes_ptr, compute_len, compute_len); + + // min y1 + __bang_write_value(scores_max_boxes_ptr, compute_len, scores_max_boxes[1]); + __bang_write_zero(tem, compute_len); + __bang_cycle_add(tem, tem, scores_max_boxes_ptr, compute_len, compute_len); + __bang_sub(tem, y1, scores_max_boxes_ptr, compute_len); + __bang_active_relu(tem, tem, compute_len); + __bang_sub(y1, y1, tem, compute_len); + + // min x2 + __bang_write_value(scores_max_boxes_ptr, compute_len, scores_max_boxes[2]); + __bang_write_zero(tem, compute_len); + __bang_cycle_add(tem, tem, scores_max_boxes_ptr, compute_len, compute_len); + __bang_sub(tem, x2, scores_max_boxes_ptr, compute_len); + __bang_active_relu(tem, tem, compute_len); + __bang_sub(x2, x2, tem, compute_len); + + // max y2 + __bang_write_value(scores_max_boxes_ptr, compute_len, scores_max_boxes[3]); + __bang_cycle_sub(y2, y2, scores_max_boxes_ptr, compute_len, compute_len); + __bang_active_relu(y2, y2, compute_len); + __bang_cycle_add(y2, y2, scores_max_boxes_ptr, compute_len, compute_len); + + // --------- intesection------- + // fing W + __bang_sub(w, x2, x1, compute_len); + __bang_active_relu(w, w, compute_len); + + // find H + __bang_sub(h, y1, y2, compute_len); + __bang_active_relu(h, h, compute_len); + + // fing intersection + __bang_mul(inter_area_ptr, h, w, compute_len); + + // fing scores max boxes area + w_s = scores_max_boxes[2] - scores_max_boxes[0]; + h_s = scores_max_boxes[1] - scores_max_boxes[3]; + scores_max_boxes_area = w_s * h_s; + + __bang_write_value(scores_max_boxes_area_ptr, compute_len, + scores_max_boxes_area); + __bang_cycle_add(tem, area_ptr, scores_max_boxes_area_ptr, + compute_len, compute_len); + __bang_sub(tem, tem, inter_area_ptr, compute_len); + __bang_write_value(nms_thres_ptr, compute_len, nms_thres); + __bang_cycle_mul(tem, tem, nms_thres_ptr, compute_len, compute_len); + + __bang_le(tem, inter_area_ptr, tem, compute_len); + + for (int i = 0; i < seg; i++) { + if (tem[i] && similar_index[i]) { + result[i] = 1; + } else { + result[i] = 0; + } + } + __memcpy(output_boxes_index + offset, result, seg * sizeof(uint8_t), + NRAM2GDRAM); +} + +UNION_OP_KERNEL_IMPLE(MlNms, float, Fast); +UNION_OP_KERNEL_IMPLE(MlNms, half, Fast); + +void MLUOP_WIN_API mluOpKernelMlNmsFloatFast( + cnrtDim3_t k_dim, cnrtFunctionType_t k_type, cnrtQueue_t queue, + mluOpDataType_t data_type, void* boxes_data_ptr, + float nms_thres, int input_boxes_num, int boxes_start_position, + uint8_t* output_boxes_index) { + MLUOpKernelMlNmsfloatFast<<>>( + data_type, boxes_data_ptr, nms_thres, + input_boxes_num, boxes_start_position, output_boxes_index); +} + +void MLUOP_WIN_API mluOpKernelMlNmsHalfFast( + cnrtDim3_t k_dim, cnrtFunctionType_t k_type, cnrtQueue_t queue, + mluOpDataType_t data_type, void* boxes_data_ptr, + float nms_thres, int input_boxes_num, int boxes_start_position, + uint8_t* output_boxes_index) { + MLUOpKernelMlNmshalfFast<<>>( + data_type, boxes_data_ptr, nms_thres, + input_boxes_num, boxes_start_position, output_boxes_index); +} diff --git a/bangc-ops/mlu_op.h b/bangc-ops/mlu_op.h index 2efc51d33..841e36edd 100755 --- a/bangc-ops/mlu_op.h +++ b/bangc-ops/mlu_op.h @@ -1464,6 +1464,30 @@ mluOpGetTensorAndDataFromTensorSet(mluOpTensorSetDescriptor_t tensorSetDesc, mluOpTensorDescriptor_t *tensorDesc, void **dataAddrInDevice); +/* + * + * @param handle : Set the handle to the MLU + * + * @param mluOpTensorDescriptor_t : Properties of the input data + * + * @param boxesDataPtr : The coordinates of the input box + * + * @param scoresMaxBoxesDataPtr : Coordin of the box with maximum accuracy + * + * @param inputBoxesNum : input box number + * + * @param iouThreshold : Threshold of intersection and union ratio + * + * @param outputBoxesIndex : Index of the output box + * + */ + +mluOpStatus_t MLUOP_WIN_API +mluOpMlNms(mluOpHandle_t handle, + const mluOpTensorDescriptor_t boxes_data_ptr_desc, + void* boxes_data_ptr, + float iou_threshold, + void* output_boxes_index); // Group:Abs /*! * @brief Computes the absolute value for every element of the input tensor \b x diff --git a/bangc-ops/mlu_op_kernel.h b/bangc-ops/mlu_op_kernel.h index d211a7e74..58be8e84c 100644 --- a/bangc-ops/mlu_op_kernel.h +++ b/bangc-ops/mlu_op_kernel.h @@ -25,6 +25,7 @@ #include #include "cnrt.h" +#include "mlu_op.h" #ifndef MLUOP_WIN_API #ifdef _WIN32 @@ -38,6 +39,17 @@ extern "C" { #endif // __cplusplus +/* ml_nms */ +void MLUOP_WIN_API mluOpKernelMlNmsFloatFast(cnrtDim3_t k_dim, + cnrtFunctionType_t k_type, cnrtQueue_t queue, mluOpDataType_t data_type, + void* boxes_data_ptr, float nms_thres, int input_boxes_num, + int boxes_start_position, uint8_t* output_boxes_index); + +void MLUOP_WIN_API mluOpKernelMlNmsHalfFast(cnrtDim3_t k_dim, + cnrtFunctionType_t k_type, cnrtQueue_t queue, mluOpDataType_t data_type, + void* boxes_data_ptr, float nms_thres, int input_boxes_num, + int boxes_start_position, uint8_t* output_boxes_index); + /* Abs */ void MLUOP_WIN_API mluOpBlockKernel3StagePipelineAbsHalfFast( cnrtDim3_t k_dim, cnrtFunctionType_t k_type, cnrtQueue_t queue, diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/mlu_op_test_proto/mlu_op_test.proto b/bangc-ops/test/mlu_op_gtest/pb_gtest/mlu_op_test_proto/mlu_op_test.proto index b90f02a0e..9714f6b04 100755 --- a/bangc-ops/test/mlu_op_gtest/pb_gtest/mlu_op_test_proto/mlu_op_test.proto +++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/mlu_op_test_proto/mlu_op_test.proto @@ -220,6 +220,7 @@ message Node { optional PsamaskForwardParam psamask_forward_param = 134658; // PsamaskForwardParam optional PsamaskBackwardParam psamask_backward_param = 134659; // PsamaskBackwardParam optional VoxelizationParam voxelization_param = 6564; // Voxelizationparam + optional MlNmsParam ml_nms_param = 4020; // MlNmsParam } @@ -418,3 +419,8 @@ message VoxelizationParam { optional int32 ndim = 3 [default = 3]; optional bool deterministic = 4 [default = true]; } + +// param to call mluOpMlNms() +message MlNmsParam { + required float iou_threshold = 1 [default = 0.2]; +} diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/ml_nms/ml_nms.cpp b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/ml_nms/ml_nms.cpp new file mode 100644 index 000000000..c71d3b2f7 --- /dev/null +++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/ml_nms/ml_nms.cpp @@ -0,0 +1,103 @@ +/******************************************************************************* +* Copyright (C) [2022] by Cambricon, Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*******************************************************************************/ +#include "ml_nms.h" +#include +#include + +namespace mluoptest { + +void MlNmsExecutor::paramCheck() { + if (!parser_->getProtoNode()->has_ml_nms_param()) { + LOG(ERROR) << "Lose ml_nms_param. "; + } + GTEST_CHECK(parser_->inputs().size() == 1, + "[MlNmsExecutor] input number is wrong. "); + GTEST_CHECK(parser_->outputs().size() == 1, + "[MlNmsExecutor] output number is wrong. "); +} + +void MlNmsExecutor::compute() { + float iou_threshold = + parser_->getProtoNode()->ml_nms_param().iou_threshold(); + VLOG(4) << "[mluMlNms] iou_threshold: " << iou_threshold; + // get tensor by name (in prototxt) + auto boxes_desc = parser_->getMetaTensor("input").tensor; + auto output_desc = parser_->getMetaTensor("output").tensor; + auto boxes_ptr = parser_->getMetaTensor("input").dev_ptr; + auto output_ptr = parser_->getMetaTensor("output").dev_ptr; + interface_timer_.start(); + + VLOG(4) << "[mluOpMlNms] call mluOpMlNms()"; + MLUOP_CHECK(mluOpMlNms(handle_, boxes_desc, boxes_ptr, + iou_threshold, (uint8_t*)output_ptr)); + interface_timer_.stop(); + VLOG(4) << "[mluOpMlNms] mluOpMlNms end."; +} + +static float iouCompute(std::vector box1, std::vector box2) { + float x1 = std::max(box1[0], box2[0]); + float y1 = std::min(box1[1], box2[1]); + float x2 = std::min(box1[2], box2[2]); + float y2 = std::max(box1[3], box2[3]); + + float area1 = abs(box1[0] - box1[2]) * abs(box1[1] - box1[3]); + float area2 = abs(box2[0] - box2[2]) * abs(box2[1] - box2[3]); + float inter = abs(x1 - x2) * abs(y1 - y2); + + float iou = inter / (area1 + area2 - inter); + + return iou; +} + +void MlNmsExecutor::cpuCompute() { + float iou_threshold = + parser_->getProtoNode()->ml_nms_param().iou_threshold(); + VLOG(4) << "mluMlNms iou_threshold:" << iou_threshold; + auto input_desc = tensor_desc_[0].tensor; + auto boxes_ptr = parser_->getMetaTensor(0).cpu_ptr; + auto output_ptr = parser_->getMetaTensor(1).cpu_ptr; + int input_boxes_num = input_desc->dims[0]; + std::vector> boxes_data_ptr; + for (int i = 0; i < input_boxes_num * 4; i+=4) { + std::vector data_ptr; + for (int j = 0; j < 4; j++) { + data_ptr.push_back(boxes_ptr[j + i]); + } + boxes_data_ptr.push_back(data_ptr); + } + for (int i = 0; i < input_boxes_num ; i++) { + float iou = iouCompute(boxes_data_ptr[0], boxes_data_ptr[i]); + if (iou <= iou_threshold) { + output_ptr[i] = 1; + } else { + output_ptr[i] = 0; + } + } +} + +int64_t MlNmsExecutor::getTheoryOps() { + int64_t theory_ops = parser_->input(0)->total_count; + VLOG(4) << "getTheoryOps: " << theory_ops << " ops"; + return theory_ops; +} +} // namespace mluoptest diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/ml_nms/ml_nms.h b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/ml_nms/ml_nms.h new file mode 100644 index 000000000..57ac0193b --- /dev/null +++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/ml_nms/ml_nms.h @@ -0,0 +1,41 @@ +/************************************************************************* + * Copyright (C) [2022] by Cambricon, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *************************************************************************/ +#ifndef TEST_MLU_OP_SRC_ZOO_ML_NMS_ML_NMS_H_ +#define TEST_MLU_OP_SRC_ZOO_ML_NMS_ML_NMS_H_ +#include "executor.h" + +namespace mluoptest { + +class MlNmsExecutor : public Executor { + public: + MlNmsExecutor() {} + ~MlNmsExecutor() {} + + void paramCheck(); + void compute(); + void cpuCompute(); + int64_t getTheoryOps() override; +}; + +} // namespace mluoptest +#endif // TEST_MLU_OP_SRC_ZOO_ML_NMS_ML_NMS_H_ diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/ml_nms/test_case/case_0.prototxt b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/ml_nms/test_case/case_0.prototxt new file mode 100644 index 000000000..3f416f48b --- /dev/null +++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/ml_nms/test_case/case_0.prototxt @@ -0,0 +1,414 @@ +op_name: "ml_nms" +input { + id: "input" + shape { + dims: 64 + dims: 6 + } + layout: LAYOUT_ARRAY + dtype: DTYPE_FLOAT + value_f:0 + value_f: 100.00 + value_f: 200.00 + value_f: 200.00 + value_f: 100.00 + value_f:0.70 + value_f:0 + value_f: 105.00 + value_f: 200.00 + value_f: 205.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 110.00 + value_f: 200.00 + value_f: 210.00 + value_f: 100.00 + value_f:0.60 + value_f:0 + value_f: 115.00 + value_f: 200.00 + value_f: 215.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 120.00 + value_f: 200.00 + value_f: 220.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 125.00 + value_f: 200.00 + value_f: 225.00 + value_f: 100.00 + value_f:0.80 + value_f:0 + value_f: 130.00 + value_f: 200.00 + value_f: 230.00 + value_f: 100.00 + value_f:0.70 + value_f:0 + value_f: 135.00 + value_f: 200.00 + value_f: 235.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 140.00 + value_f: 200.00 + value_f: 240.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 145.00 + value_f: 200.00 + value_f: 245.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 150.00 + value_f: 200.00 + value_f: 250.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 155.00 + value_f: 200.00 + value_f: 255.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 160.00 + value_f: 200.00 + value_f: 260.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 165.00 + value_f: 200.00 + value_f: 265.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 170.00 + value_f: 200.00 + value_f: 270.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 175.00 + value_f: 200.00 + value_f: 275.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 180.00 + value_f: 200.00 + value_f: 280.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 185.00 + value_f: 200.00 + value_f: 285.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 190.00 + value_f: 200.00 + value_f: 290.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 195.00 + value_f: 200.00 + value_f: 295.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 205.00 + value_f: 200.00 + value_f: 105.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 210.00 + value_f: 200.00 + value_f: 110.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 215.00 + value_f: 200.00 + value_f: 115.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 220.00 + value_f: 200.00 + value_f: 120.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 225.00 + value_f: 200.00 + value_f: 125.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 230.00 + value_f: 200.00 + value_f: 130.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 235.00 + value_f: 200.00 + value_f: 135.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 240.00 + value_f: 200.00 + value_f: 140.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 245.00 + value_f: 200.00 + value_f: 145.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 250.00 + value_f: 200.00 + value_f: 150.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 255.00 + value_f: 200.00 + value_f: 155.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 260.00 + value_f: 200.00 + value_f: 160.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 265.00 + value_f: 200.00 + value_f: 165.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 270.00 + value_f: 200.00 + value_f: 170.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 275.00 + value_f: 200.00 + value_f: 175.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 280.00 + value_f: 200.00 + value_f: 180.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 285.00 + value_f: 200.00 + value_f: 185.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 290.00 + value_f: 200.00 + value_f: 190.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 295.00 + value_f: 200.00 + value_f: 195.00 + value_f:0.90 + value_f:0 + value_f: 100.00 + value_f: 300.00 + value_f: 200.00 + value_f: 200.00 + value_f:0.90 + value_f:0 + value_f: 105.00 + value_f: 195.00 + value_f: 205.00 + value_f: 95.00 + value_f:0.90 + value_f:0 + value_f: 110.00 + value_f: 190.00 + value_f: 210.00 + value_f: 90.00 + value_f:0.90 + value_f:0 + value_f: 115.00 + value_f: 185.00 + value_f: 215.00 + value_f: 85.00 + value_f:0.90 + value_f:0 + value_f: 120.00 + value_f: 180.00 + value_f: 220.00 + value_f: 80.00 + value_f:0.90 + value_f:0 + value_f: 125.00 + value_f: 175.00 + value_f: 225.00 + value_f: 75.00 + value_f:0.90 + value_f:0 + value_f: 130.00 + value_f: 170.00 + value_f: 230.00 + value_f: 70.00 + value_f:0.90 + value_f:0 + value_f: 135.00 + value_f: 165.00 + value_f: 235.00 + value_f: 65.00 + value_f:0.90 + value_f:0 + value_f: 140.00 + value_f: 160.00 + value_f: 240.00 + value_f: 60.00 + value_f:0.90 + value_f:0 + value_f: 145.00 + value_f: 155.00 + value_f: 245.00 + value_f: 55.00 + value_f:0.90 + value_f:0 + value_f: 150.00 + value_f: 150.00 + value_f: 250.00 + value_f: 50.00 + value_f:0.90 + value_f:0 + value_f: 155.00 + value_f: 145.00 + value_f: 255.00 + value_f: 45.00 + value_f:0.90 + value_f:0 + value_f: 160.00 + value_f: 140.00 + value_f: 260.00 + value_f: 40.00 + value_f:0.90 + value_f:0 + value_f: 165.00 + value_f: 135.00 + value_f: 265.00 + value_f: 35.00 + value_f:0.90 + value_f:0 + value_f: 170.00 + value_f: 130.00 + value_f: 270.00 + value_f: 30.00 + value_f:0.90 + value_f:0 + value_f: 175.00 + value_f: 125.00 + value_f: 275.00 + value_f: 25.00 + value_f:0.90 + value_f:0 + value_f: 180.00 + value_f: 120.00 + value_f: 280.00 + value_f: 20.00 + value_f:0.90 + value_f:0 + value_f: 185.00 + value_f: 115.00 + value_f: 285.00 + value_f: 15.00 + value_f:0.90 + value_f:0 + value_f: 190.00 + value_f: 110.00 + value_f: 290.00 + value_f: 10.00 + value_f:0.90 + value_f:0 + value_f: 195.00 + value_f: 105.00 + value_f: 295.00 + value_f: 5.00 + value_f:0.90 + value_f:0 + value_f: 200.00 + value_f: 100.00 + value_f: 300.00 + value_f: 0.00 + value_f:0.90 + value_f:0 + value_f: 101.00 + value_f: 200.00 + value_f: 201.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 108.00 + value_f: 200.00 + value_f: 208.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 127.00 + value_f: 200.00 + value_f: 227.00 + value_f: 100.00 + value_f:0.90 + value_f:0 + value_f: 137.00 + value_f: 200.00 + value_f: 237.00 + value_f: 100.00 + value_f:0.90 +} +output { + id: "output" + shape: { + dims: 64 + } + layout: LAYOUT_ARRAY + dtype: DTYPE_INT8 +} +ml_nms_param: { + iou_threshold: 0.5 +} +test_param: { + error_func: DIFF1 + error_func: DIFF2 + error_func: DIFF3 + error_threshold: 0.0 + error_threshold: 0.0 + error_threshold: 0.0 + baseline_device: CPU +}