diff --git a/src/SSCMA_Micro_Core.cpp b/src/SSCMA_Micro_Core.cpp index 64a9318..a809840 100644 --- a/src/SSCMA_Micro_Core.cpp +++ b/src/SSCMA_Micro_Core.cpp @@ -440,9 +440,7 @@ SSCMAMicroCore::Expected SSCMAMicroCore::invoke(const Frame& frame, const Invoke } auto results = algorithm->getResults(); if (_config.invoke_config && _config.invoke_config->top_k > 0) { - std::sort(results.begin(), results.end(), [](const ma_keypoint3f_t& a, const ma_keypoint3f_t& b) { return a.box.score > b.box.score; }); - results.resize(std::min(results.size(), static_cast(_config.invoke_config->top_k))); - results.shrink_to_fit(); + results.sort([](const ma_keypoint3f_t& a, const ma_keypoint3f_t& b) { return a.box.score > b.box.score; }); } std::vector keypoints; for (const auto& result : results) { diff --git a/src/components/sscma-micro/sscma/core/cv/ma_cv.h b/src/components/sscma-micro/sscma/core/cv/ma_cv.h index 93ee012..316797a 100644 --- a/src/components/sscma-micro/sscma/core/cv/ma_cv.h +++ b/src/components/sscma-micro/sscma/core/cv/ma_cv.h @@ -13,7 +13,7 @@ ma_err_t convert(const ma_img_t* src, ma_img_t* dst); #if MA_USE_LIB_JPEGENC ma_err_t rgb_to_jpeg(const ma_img_t* src, ma_img_t* dst); -#endif +#endif #ifdef __cplusplus } diff --git a/src/components/sscma-micro/sscma/core/engine/ma_engine_halio.cpp b/src/components/sscma-micro/sscma/core/engine/ma_engine_halio.cpp index 89dbddb..3a2bbb3 100644 --- a/src/components/sscma-micro/sscma/core/engine/ma_engine_halio.cpp +++ b/src/components/sscma-micro/sscma/core/engine/ma_engine_halio.cpp @@ -48,13 +48,20 @@ ma_err_t EngineHalio::run() { return MA_FAILED; } - auto job = _configured_model->run_async(*_bindings, [](const AsyncInferCompletionInfo& info) {}); + auto job = _configured_model->run_async(*_bindings, [&](const AsyncInferCompletionInfo& info) { sta = info.status; }); do { this_thread::yield(); - } while (job->wait(1000ms) != HAILO_SUCCESS); + } while (job->wait(50ms) != HAILO_SUCCESS); - return MA_OK; + switch (sta) { + case HAILO_SUCCESS: + return MA_OK; + case HAILO_TIMEOUT: + return MA_ETIMEOUT; + default: + return MA_FAILED; + } } #if MA_USE_FILESYSTEM @@ -121,11 +128,16 @@ ma_err_t EngineHalio::load(const string& model_path) { { - auto create_internal_bindings = [&](const string& name, const InferModel::InferStream& tsr, shared_ptr& tensor) { + auto create_internal_bindings = + [&](const string& name, const InferModel::InferStream& tsr, shared_ptr& tensor, hailort::ConfiguredInferModel::Bindings::InferStream* cis, bool is_input) -> ma_err_t { auto shape = tsr.shape(); auto size = tsr.get_frame_size(); auto format = tsr.format(); + if (!cis) { + return MA_FAILED; + } + void* buffer = aligned_alloc(4096, size); if (!buffer) { return MA_ENOMEM; @@ -145,27 +157,30 @@ ma_err_t EngineHalio::load(const string& model_path) { return MA_ENOMEM; } + cis->set_buffer(MemoryView(buffer, size)); + tensor->data.data = buffer; tensor->size = size; - tensor->shape.size = 3; + tensor->shape.size = 4; + tensor->shape.dims[0] = 1; switch (format.order) { case HAILO_FORMAT_ORDER_NCHW: - tensor->shape.dims[0] = shape.features; - tensor->shape.dims[1] = shape.height; - tensor->shape.dims[2] = shape.width; + tensor->shape.dims[1] = shape.features; + tensor->shape.dims[2] = shape.height; + tensor->shape.dims[3] = shape.width; break; case HAILO_FORMAT_ORDER_NHWC: case HAILO_FORMAT_ORDER_FCR: case HAILO_FORMAT_ORDER_HAILO_NMS: - tensor->shape.dims[0] = shape.height; - tensor->shape.dims[1] = shape.width; - tensor->shape.dims[2] = shape.features; + tensor->shape.dims[1] = shape.height; + tensor->shape.dims[2] = shape.width; + tensor->shape.dims[3] = shape.features; break; case HAILO_FORMAT_ORDER_NHCW: - tensor->shape.dims[0] = shape.height; - tensor->shape.dims[1] = shape.features; - tensor->shape.dims[2] = shape.width; + tensor->shape.dims[1] = shape.height; + tensor->shape.dims[2] = shape.features; + tensor->shape.dims[3] = shape.width; break; default: break; @@ -192,44 +207,76 @@ ma_err_t EngineHalio::load(const string& model_path) { break; case HAILO_FORMAT_TYPE_FLOAT32: tensor->type = MA_TENSOR_TYPE_F32; - if (format.order == HAILO_FORMAT_ORDER_HAILO_NMS) { + break; + default: + tensor->type = MA_TENSOR_TYPE_NONE; + break; + } + + if (format.order == HAILO_FORMAT_ORDER_HAILO_NMS) { + switch (format.type) { + case HAILO_FORMAT_TYPE_UINT16: + tensor->type = MA_TENSOR_TYPE_NMS_BBOX_U16; + break; + case HAILO_FORMAT_TYPE_FLOAT32: tensor->type = MA_TENSOR_TYPE_NMS_BBOX_F32; + break; + default: + tensor->type = MA_TENSOR_TYPE_NONE; + break; + } - function f = [this_ptr = this, name](int flag, void* data, size_t size) -> ma_err_t { - if (!data || sizeof(float) != size) { + auto fp = make_shared([this_ptr = this, name, is_input](int flag, void* data, size_t size) -> ma_err_t { + if (!data) { + return MA_EINVAL; + } + auto tsr = is_input ? this_ptr->_model->input(name) : this_ptr->_model->output(name); + if (!tsr) { + return MA_FAILED; + } + switch (flag) { + case 0: // get score threshold + return MA_ENOTSUP; + case 1: // set score threshold + { + if (sizeof(float) != size) { + return MA_EINVAL; + } + float threshold = *static_cast(data); + tsr->set_nms_score_threshold(threshold); + return MA_OK; + } + case 2: // get iou threshold + return MA_ENOTSUP; + case 3: // set iou threshold + { + if (sizeof(float) != size) { return MA_EINVAL; } float threshold = *static_cast(data); - auto tsr = this_ptr->_model->input(name); - if (!tsr) { + tsr->set_nms_iou_threshold(threshold); + return MA_OK; + } + case 4: // get nms shape + { + auto nms_shape = tsr->get_nms_shape(); + if (!nms_shape) { return MA_FAILED; } - switch (flag) { - case 0: // get score threshold - return MA_ENOTSUP; - case 1: // set score threshold - tsr->set_nms_score_threshold(threshold); - return MA_OK; - case 2: // get iou threshold - return MA_ENOTSUP; - case 3: // set iou threshold - tsr->set_nms_iou_threshold(threshold); - return MA_OK; - default: - return MA_ENOTSUP; + auto shape = nms_shape.value(); + if (sizeof(hailo_nms_shape_t) != size) { + return MA_EINVAL; } - }; - - _external_handlers[name] = f; - if (!_external_handlers[name]) { - break; + *static_cast(data) = shape; + return MA_OK; } - tensor->external_handler = reinterpret_cast(&_external_handlers[name]); + default: + return MA_ENOTSUP; } - break; - default: - tensor->type = MA_TENSOR_TYPE_NONE; - break; + }); + + _external_handlers[name] = fp; + tensor->external_handler = reinterpret_cast(fp.get()); } _io_buffers[name] = tensor; @@ -243,14 +290,15 @@ ma_err_t EngineHalio::load(const string& model_path) { if (_io_buffers.find(name) != _io_buffers.end()) { continue; } - shared_ptr tensor = nullptr; - - auto ret = create_internal_bindings(name, tsr, tensor); + auto bindings_input = _bindings->input(name); + if (!bindings_input) { + return MA_FAILED; + } + auto ret = create_internal_bindings(name, tsr, tensor, &bindings_input.value(), true); if (ret != MA_OK) { return ret; } - _input_tensors.push_back(tensor); } @@ -260,19 +308,19 @@ ma_err_t EngineHalio::load(const string& model_path) { if (_io_buffers.find(name) != _io_buffers.end()) { continue; } - shared_ptr tensor = nullptr; - - auto ret = create_internal_bindings(name, tsr, tensor); + auto bindings_output = _bindings->output(name); + if (!bindings_output) { + return MA_FAILED; + } + auto ret = create_internal_bindings(name, tsr, tensor, &bindings_output.value(), false); if (ret != MA_OK) { return ret; } - _output_tensors.push_back(tensor); } } - return MA_OK; } @@ -348,7 +396,7 @@ ma_quant_param_t EngineHalio::getOutputQuantParam(int32_t index) { ma_err_t EngineHalio::setInput(int32_t index, const ma_tensor_t& tensor) { - return MA_ENOTSUP; + return MA_ENOTSUP; } } // namespace ma::engine diff --git a/src/components/sscma-micro/sscma/core/engine/ma_engine_halio.h b/src/components/sscma-micro/sscma/core/engine/ma_engine_halio.h index ad7e16e..2175767 100644 --- a/src/components/sscma-micro/sscma/core/engine/ma_engine_halio.h +++ b/src/components/sscma-micro/sscma/core/engine/ma_engine_halio.h @@ -23,6 +23,8 @@ using namespace hailort; class EngineHalio final : public Engine { public: + using ExternalHandler = function; + EngineHalio(); ~EngineHalio() override; @@ -56,7 +58,7 @@ class EngineHalio final : public Engine { shared_ptr _bindings; unordered_map> _io_buffers; - unordered_map> _external_handlers; + unordered_map> _external_handlers; vector> _input_tensors; vector> _output_tensors; @@ -66,4 +68,4 @@ class EngineHalio final : public Engine { #endif -#endif +#endif \ No newline at end of file diff --git a/src/components/sscma-micro/sscma/core/ma_types.h b/src/components/sscma-micro/sscma/core/ma_types.h index b062d6f..984c65e 100644 --- a/src/components/sscma-micro/sscma/core/ma_types.h +++ b/src/components/sscma-micro/sscma/core/ma_types.h @@ -67,7 +67,8 @@ typedef enum { MA_TENSOR_TYPE_STR = 12, MA_TENSOR_TYPE_BOOL = 13, MA_TENSOR_TYPE_BF16 = 14, - MA_TENSOR_TYPE_NMS_BBOX_F32 = 15, + MA_TENSOR_TYPE_NMS_BBOX_U16 = 15, + MA_TENSOR_TYPE_NMS_BBOX_F32 = 16, } ma_tensor_type_t; typedef struct { @@ -212,6 +213,17 @@ struct ma_keypoint4f_t { ma_bbox_t box; std::vector pts; }; + +struct ma_segm2f_t { + ma_bbox_t box; + struct { + uint16_t width; + uint16_t height; + std::vector data; + } mask; +}; + + #endif typedef enum { @@ -239,6 +251,26 @@ typedef enum { typedef enum { MA_MSG_TYPE_RESP = 0, MA_MSG_TYPE_EVT = 1, MA_MSG_TYPE_LOG = 2, MA_MSG_TYPE_REQ = 3, MA_MSG_TYPE_HB = 4 } ma_msg_type_t; +#define MA_INPUT_TYPE_MASK 0xF000 +#define MA_OUTPUT_TYPE_MASK 0x0F00 +#define MA_MODEL_TYPE_MASK 0x00FF + +typedef enum { + MA_INPUT_TYPE_TENSOR = 0x0000, + MA_INPUT_TYPE_IMAGE = 0x1000, + MA_INPUT_TYPE_AUDIO = 0x2000, +} ma_input_type_t; + +typedef enum { + MA_OUTPUT_TYPE_TENSOR = 0x0000, + MA_OUTPUT_TYPE_CLASS = 0x0100, + MA_OUTPUT_TYPE_POINT = 0x0200, + MA_OUTPUT_TYPE_BBOX = 0x0300, + MA_OUTPUT_TYPE_KEYPOINT = 0x0400, + MA_OUTPUT_TYPE_SEGMENTATION = 0x0500, +} ma_output_type_t; + + typedef enum { MA_MODEL_TYPE_UNDEFINED = 0u, MA_MODEL_TYPE_FOMO = 1u, @@ -249,7 +281,9 @@ typedef enum { MA_MODEL_TYPE_YOLOV8 = 6u, MA_MODEL_TYPE_NVIDIA_DET = 7u, MA_MODEL_TYPE_YOLO_WORLD = 8u, - MA_MODEL_TYPE_YOLO11 = 9u, + MA_MODEL_TYPE_YOLO11 = 9u, + MA_MODEL_TYPE_YOLO11_POSE = 10u, + MA_MODEL_TYPE_YOLO11_SEG = 11u, } ma_model_type_t; typedef struct { diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_base.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_base.cpp index be60c24..9257d46 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_base.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_base.cpp @@ -4,15 +4,11 @@ namespace ma { constexpr char TAG[] = "ma::model"; -Model::Model(Engine* engine, const char* name, ma_model_type_t type) - - - // Initialize performance metrics to 0 using initializer list -{ - - p_engine_ = engine; - p_name_ = name; - m_type_ = type; +Model::Model(Engine* engine, const char* name, uint16_t type) { + + p_engine_ = engine; + p_name_ = name; + m_type_ = type; p_user_ctx_ = nullptr; p_preprocess_done_ = nullptr; @@ -75,7 +71,14 @@ const char* Model::getName() const { } ma_model_type_t Model::getType() const { - return m_type_; + return static_cast(m_type_ & MA_MODEL_TYPE_MASK); +} + +ma_input_type_t Model::getInputType() const { + return static_cast(m_type_ & MA_INPUT_TYPE_MASK); +} +ma_output_type_t Model::getOutputType() const { + return static_cast(m_type_ & MA_OUTPUT_TYPE_MASK); } void Model::setPreprocessDone(std::function func) { @@ -94,11 +97,4 @@ void Model::setUserCtx(void* ctx) { p_user_ctx_ = ctx; } -class ModelFactory { -public: - static Model* create(Engine* engine); - static ma_err_t remove(const std::string& name); -}; - - -} // namespace ma::model \ No newline at end of file +} // namespace ma \ No newline at end of file diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_base.h b/src/components/sscma-micro/sscma/core/model/ma_model_base.h index 258ca93..a7604c8 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_base.h +++ b/src/components/sscma-micro/sscma/core/model/ma_model_base.h @@ -2,8 +2,8 @@ #define _MA_MODEL_BASE_H_ #include -#include #include +#include #include "../engine/ma_engine.h" #include "../ma_common.h" @@ -12,33 +12,36 @@ namespace ma { using namespace ma::engine; class Model { - private: +private: ma_perf_t perf_; - std::function p_preprocess_done_ ; - std::function p_postprocess_done_ ; - std::function p_underlying_run_done_ ; - void* p_user_ctx_; - ma_model_type_t m_type_; + std::function p_preprocess_done_; + std::function p_postprocess_done_; + std::function p_underlying_run_done_; + void* p_user_ctx_; + uint16_t m_type_; - protected: - Engine* p_engine_; - const char* p_name_; +protected: + Engine* p_engine_; + const char* p_name_; virtual ma_err_t preprocess() = 0; virtual ma_err_t postprocess() = 0; - ma_err_t underlyingRun(); + ma_err_t underlyingRun(); - public: - Model(Engine* engine, const char* name, ma_model_type_t type); +public: + Model(Engine* engine, const char* name, uint16_t type); virtual ~Model(); - const ma_perf_t getPerf() const; - const char* getName() const; - ma_model_type_t getType() const; + const ma_perf_t getPerf() const; + const char* getName() const; + ma_model_type_t getType() const; + ma_input_type_t getInputType() const; + ma_output_type_t getOutputType() const; + virtual const void* getInput() = 0; virtual ma_err_t setConfig(ma_model_cfg_opt_t opt, ...) = 0; virtual ma_err_t getConfig(ma_model_cfg_opt_t opt, ...) = 0; - void setPreprocessDone (std::function func); - void setPostprocessDone (std::function func); - void setRunDone (std::function func); - void setUserCtx(void* ctx); + void setPreprocessDone(std::function func); + void setPostprocessDone(std::function func); + void setRunDone(std::function func); + void setUserCtx(void* ctx); }; } // namespace ma diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_classifier.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_classifier.cpp index 8cbae3f..9b51db1 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_classifier.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_classifier.cpp @@ -6,7 +6,7 @@ namespace ma::model { constexpr char TAG[] = "ma::model::classifier"; -Classifier::Classifier(Engine* p_engine) : Model(p_engine, "IMCLS", MA_MODEL_TYPE_IMCLS) { +Classifier::Classifier(Engine* p_engine) : Model(p_engine, "IMCLS", MA_INPUT_TYPE_IMAGE | MA_OUTPUT_TYPE_CLASS | MA_MODEL_TYPE_IMCLS) { input_ = p_engine_->getInput(0); output_ = p_engine_->getOutput(0); threshold_score_ = 0.5f; @@ -33,38 +33,36 @@ Classifier::~Classifier() {} bool Classifier::isValid(Engine* engine) { - const auto& input_shape = engine->getInputShape(0); - auto is_nhwc{input_shape.dims[3] == 3 || input_shape.dims[3] == 1}; - - if (is_nhwc) { - if (input_shape.size != 4 || // N, H, W, C - input_shape.dims[0] != 1 || // N = 1 - input_shape.dims[1] < 16 || // H >= 16 - input_shape.dims[2] < 16 || // W >= 16 - (input_shape.dims[3] != 3 && // C = RGB or Gray - input_shape.dims[3] != 1)) - return false; - } else { + const auto inputs_count = engine->getInputSize(); + const auto outputs_count = engine->getOutputSize(); - if (input_shape.size != 4 || // N, C, H, W - input_shape.dims[0] != 1 || // N = 1 - input_shape.dims[2] < 16 || // H >= 16 - input_shape.dims[3] < 16 || // W >= 16 - (input_shape.dims[1] != 3 && // C = RGB or Gray - input_shape.dims[1] != 1)) - return false; + if (inputs_count != 1 || outputs_count != 1) { + return false; } - + const auto& input_shape = engine->getInputShape(0); const auto& output_shape{engine->getOutputShape(0)}; - if (output_shape.size != 2 || // N, C - output_shape.dims[0] != 1 || // N = 1 + int n = input_shape.dims[0], h = input_shape.dims[1], w = input_shape.dims[2], c = input_shape.dims[3]; + bool is_nhwc = c == 3 || c == 1; + + if (!is_nhwc) + std::swap(h, c); + + if (n != 1 || h < 32 || h % 32 != 0 || (c != 3 && c != 1)) + return false; + + + if (output_shape.dims[0] != 1 || // N = 1 output_shape.dims[1] < 2 // C >= 2 ) { return false; } + if (output_shape.size >= 3) { + return false; + } + return true; } @@ -107,6 +105,16 @@ ma_err_t Classifier::postprocess() { if (score > threshold_score_) results_.emplace_front(ma_class_t{score, i}); } + } + if (output_.type == MA_TENSOR_TYPE_F32) { + auto* data = output_.data.f32; + auto pred_l{output_.shape.dims[1]}; + for (decltype(pred_l) i{0}; i < pred_l; ++i) { + auto score{data[i]}; + if (score > threshold_score_) + results_.emplace_front(ma_class_t{score, i}); + } + } else { return MA_ENOTSUP; } @@ -121,7 +129,7 @@ const std::forward_list& Classifier::getResults() { return results_; } -const ma_img_t* Classifier::getInputImg() { +const void* Classifier::getInput() { return &img_; } diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_classifier.h b/src/components/sscma-micro/sscma/core/model/ma_model_classifier.h index 163af9f..703206d 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_classifier.h +++ b/src/components/sscma-micro/sscma/core/model/ma_model_classifier.h @@ -28,7 +28,7 @@ class Classifier : public Model { virtual ~Classifier(); static bool isValid(Engine* engine); const std::forward_list& getResults(); - const ma_img_t* getInputImg(); + const void *getInput(); ma_err_t run(const ma_img_t* img); ma_err_t setConfig(ma_model_cfg_opt_t opt, ...) override; ma_err_t getConfig(ma_model_cfg_opt_t opt, ...) override; diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_detector.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_detector.cpp index 3f40d35..52a0b53 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_detector.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_detector.cpp @@ -9,7 +9,7 @@ namespace ma::model { constexpr char TAG[] = "ma::model::detecor"; Detector::Detector(Engine* p_engine, const char* name, ma_model_type_t type) - : Model(p_engine, name, type), + : Model(p_engine, name, MA_INPUT_TYPE_IMAGE | MA_OUTPUT_TYPE_BBOX | type), input_(p_engine->getInput(0)), // Use direct method call instead of p_engine_-> threshold_nms_(0.45), threshold_score_(0.25) { @@ -58,10 +58,10 @@ const std::forward_list& Detector::getResults() { return results_; } - -const ma_img_t* Detector::getInputImg() { - return &img_; +const void* Detector::getInput() { + return static_cast(&img_); } + ma_err_t Detector::run(const ma_img_t* img) { // MA_ASSERT(img != nullptr); input_img_ = img; diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_detector.h b/src/components/sscma-micro/sscma/core/model/ma_model_detector.h index 784216b..04f02c7 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_detector.h +++ b/src/components/sscma-micro/sscma/core/model/ma_model_detector.h @@ -26,7 +26,7 @@ class Detector : public Model { Detector(Engine* engine, const char* name, ma_model_type_t type); virtual ~Detector(); const std::forward_list& getResults(); - const ma_img_t* getInputImg(); + const void* getInput() override; ma_err_t run(const ma_img_t* img); ma_err_t setConfig(ma_model_cfg_opt_t opt, ...) override; ma_err_t getConfig(ma_model_cfg_opt_t opt, ...) override; diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_factory.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_factory.cpp index 3d02d89..551d9f3 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_factory.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_factory.cpp @@ -55,6 +55,14 @@ Model* ModelFactory::create(Engine* engine, size_t algorithm_id) { if (Yolo11::isValid(engine)) { return new Yolo11(engine); } + case MA_MODEL_TYPE_YOLO11_POSE: + if (Yolo11Pose::isValid(engine)) { + return new Yolo11Pose(engine); + } + case MA_MODEL_TYPE_YOLO11_SEG: + if (Yolo11Seg::isValid(engine)) { + return new Yolo11Seg(engine); + } } return nullptr; diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_factory.h b/src/components/sscma-micro/sscma/core/model/ma_model_factory.h index cf32a3f..c462610 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_factory.h +++ b/src/components/sscma-micro/sscma/core/model/ma_model_factory.h @@ -7,17 +7,19 @@ #include "ma_model_classifier.h" #include "ma_model_detector.h" -#include "ma_model_pose_detector.h" #include "ma_model_point_detector.h" +#include "ma_model_pose_detector.h" -#include "ma_model_yolov5.h" -#include "ma_model_yolov8.h" -#include "ma_model_yolov8_pose.h" -#include "ma_model_nvidia_det.h" #include "ma_model_fomo.h" +#include "ma_model_nvidia_det.h" #include "ma_model_pfld.h" -#include "ma_model_yolo_world.h" #include "ma_model_yolo11.h" +#include "ma_model_yolo11_pose.h" +#include "ma_model_yolo11_seg.h" +#include "ma_model_yolo_world.h" +#include "ma_model_yolov5.h" +#include "ma_model_yolov8.h" +#include "ma_model_yolov8_pose.h" namespace ma { diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_point_detector.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_point_detector.cpp index dec3c28..f4c78c4 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_point_detector.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_point_detector.cpp @@ -6,7 +6,7 @@ namespace ma::model { constexpr char TAG[] = "ma::model::point_detecor"; -PointDetector::PointDetector(Engine* p_engine, const char* name, ma_model_type_t type) : Model(p_engine, name, type) { +PointDetector::PointDetector(Engine* p_engine, const char* name, ma_model_type_t type) : Model(p_engine, name, MA_INPUT_TYPE_IMAGE | MA_OUTPUT_TYPE_POINT | type) { input_ = p_engine_->getInput(0); threshold_score_ = 0.25; @@ -30,7 +30,9 @@ PointDetector::PointDetector(Engine* p_engine, const char* name, ma_model_type_t PointDetector::~PointDetector() {} -const std::vector& PointDetector::getResults() const { return results_; } +const std::vector& PointDetector::getResults() const { + return results_; +} ma_err_t PointDetector::preprocess() { ma_err_t ret = MA_OK; @@ -56,19 +58,23 @@ ma_err_t PointDetector::run(const ma_img_t* img) { return underlyingRun(); } +const void* PointDetector::getInput() { + return static_cast(&img_); +} + ma_err_t PointDetector::setConfig(ma_model_cfg_opt_t opt, ...) { ma_err_t ret = MA_OK; - va_list args; + va_list args; va_start(args, opt); switch (opt) { - case MA_MODEL_CFG_OPT_THRESHOLD: - threshold_score_ = va_arg(args, double); - ret = MA_OK; - break; - - default: - ret = MA_EINVAL; - break; + case MA_MODEL_CFG_OPT_THRESHOLD: + threshold_score_ = va_arg(args, double); + ret = MA_OK; + break; + + default: + ret = MA_EINVAL; + break; } va_end(args); return ret; @@ -76,18 +82,18 @@ ma_err_t PointDetector::setConfig(ma_model_cfg_opt_t opt, ...) { ma_err_t PointDetector::getConfig(ma_model_cfg_opt_t opt, ...) { ma_err_t ret = MA_OK; - va_list args; - void* p_arg = nullptr; + va_list args; + void* p_arg = nullptr; va_start(args, opt); switch (opt) { - case MA_MODEL_CFG_OPT_THRESHOLD: - p_arg = va_arg(args, void*); - *(static_cast(p_arg)) = threshold_score_; - break; - - default: - ret = MA_EINVAL; - break; + case MA_MODEL_CFG_OPT_THRESHOLD: + p_arg = va_arg(args, void*); + *(static_cast(p_arg)) = threshold_score_; + break; + + default: + ret = MA_EINVAL; + break; } va_end(args); return ret; diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_point_detector.h b/src/components/sscma-micro/sscma/core/model/ma_model_point_detector.h index 9796c61..80761b6 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_point_detector.h +++ b/src/components/sscma-micro/sscma/core/model/ma_model_point_detector.h @@ -8,9 +8,9 @@ namespace ma::model { class PointDetector : public Model { - protected: - ma_tensor_t input_; - ma_img_t img_; +protected: + ma_tensor_t input_; + ma_img_t img_; const ma_img_t* input_img_; float threshold_score_; @@ -19,10 +19,10 @@ class PointDetector : public Model { std::vector results_; - protected: +protected: ma_err_t preprocess() override; - public: +public: PointDetector(Engine* engine, const char* name, ma_model_type_t type); virtual ~PointDetector(); @@ -30,6 +30,8 @@ class PointDetector : public Model { ma_err_t run(const ma_img_t* img); + const void* getInput() override; + ma_err_t setConfig(ma_model_cfg_opt_t opt, ...) override; ma_err_t getConfig(ma_model_cfg_opt_t opt, ...) override; diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_pose_detector.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_pose_detector.cpp index f1cde82..f0efde5 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_pose_detector.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_pose_detector.cpp @@ -6,7 +6,7 @@ namespace ma::model { constexpr char TAG[] = "ma::model::pose_detecor"; -PoseDetector::PoseDetector(Engine* p_engine, const char* name, ma_model_type_t type) : Model(p_engine, name, type) { +PoseDetector::PoseDetector(Engine* p_engine, const char* name, ma_model_type_t type) : Model(p_engine, name, MA_INPUT_TYPE_IMAGE | MA_OUTPUT_TYPE_KEYPOINT | type) { input_ = p_engine_->getInput(0); threshold_nms_ = 0.45; threshold_score_ = 0.25; @@ -31,7 +31,13 @@ PoseDetector::PoseDetector(Engine* p_engine, const char* name, ma_model_type_t t PoseDetector::~PoseDetector() {} -const std::vector& PoseDetector::getResults() const { return results_; } +const std::forward_list& PoseDetector::getResults() const { + return results_; +} + +const void* PoseDetector::getInput() { + return static_cast(&img_); +} ma_err_t PoseDetector::preprocess() { ma_err_t ret = MA_OK; @@ -59,20 +65,20 @@ ma_err_t PoseDetector::run(const ma_img_t* img) { ma_err_t PoseDetector::setConfig(ma_model_cfg_opt_t opt, ...) { ma_err_t ret = MA_OK; - va_list args; + va_list args; va_start(args, opt); switch (opt) { - case MA_MODEL_CFG_OPT_THRESHOLD: - threshold_score_ = va_arg(args, double); - ret = MA_OK; - break; - case MA_MODEL_CFG_OPT_NMS: - threshold_nms_ = va_arg(args, double); - ret = MA_OK; - break; - default: - ret = MA_EINVAL; - break; + case MA_MODEL_CFG_OPT_THRESHOLD: + threshold_score_ = va_arg(args, double); + ret = MA_OK; + break; + case MA_MODEL_CFG_OPT_NMS: + threshold_nms_ = va_arg(args, double); + ret = MA_OK; + break; + default: + ret = MA_EINVAL; + break; } va_end(args); return ret; @@ -80,21 +86,21 @@ ma_err_t PoseDetector::setConfig(ma_model_cfg_opt_t opt, ...) { ma_err_t PoseDetector::getConfig(ma_model_cfg_opt_t opt, ...) { ma_err_t ret = MA_OK; - va_list args; - void* p_arg = nullptr; + va_list args; + void* p_arg = nullptr; va_start(args, opt); switch (opt) { - case MA_MODEL_CFG_OPT_THRESHOLD: - p_arg = va_arg(args, void*); - *(static_cast(p_arg)) = threshold_score_; - break; - case MA_MODEL_CFG_OPT_NMS: - p_arg = va_arg(args, void*); - *(static_cast(p_arg)) = threshold_nms_; - break; - default: - ret = MA_EINVAL; - break; + case MA_MODEL_CFG_OPT_THRESHOLD: + p_arg = va_arg(args, void*); + *(static_cast(p_arg)) = threshold_score_; + break; + case MA_MODEL_CFG_OPT_NMS: + p_arg = va_arg(args, void*); + *(static_cast(p_arg)) = threshold_nms_; + break; + default: + ret = MA_EINVAL; + break; } va_end(args); return ret; diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_pose_detector.h b/src/components/sscma-micro/sscma/core/model/ma_model_pose_detector.h index fcdb274..38ab2d9 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_pose_detector.h +++ b/src/components/sscma-micro/sscma/core/model/ma_model_pose_detector.h @@ -8,9 +8,9 @@ namespace ma::model { class PoseDetector : public Model { - protected: - ma_tensor_t input_; - ma_img_t img_; +protected: + ma_tensor_t input_; + ma_img_t img_; const ma_img_t* input_img_; float threshold_nms_; @@ -18,21 +18,23 @@ class PoseDetector : public Model { bool is_nhwc_; - std::vector results_; + std::forward_list results_; - protected: +protected: ma_err_t preprocess() override; - public: +public: PoseDetector(Engine* engine, const char* name, ma_model_type_t type); virtual ~PoseDetector(); - const std::vector& getResults() const; + const std::forward_list& getResults() const; ma_err_t run(const ma_img_t* img); + const void* getInput() override; + ma_err_t setConfig(ma_model_cfg_opt_t opt, ...) override; - + ma_err_t getConfig(ma_model_cfg_opt_t opt, ...) override; }; diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_segmenter.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_segmenter.cpp new file mode 100644 index 0000000..e6c54ef --- /dev/null +++ b/src/components/sscma-micro/sscma/core/model/ma_model_segmenter.cpp @@ -0,0 +1,108 @@ +#include "ma_model_segmenter.h" + +#include "../cv/ma_cv.h" + +namespace ma::model { + +constexpr char TAG[] = "ma::model::segmenter"; + +Segmenter::Segmenter(Engine* p_engine, const char* name, ma_model_type_t type) : Model(p_engine, name, MA_INPUT_TYPE_IMAGE | MA_OUTPUT_TYPE_SEGMENTATION | type) { + input_ = p_engine_->getInput(0); + threshold_nms_ = 0.45; + threshold_score_ = 0.25; + + is_nhwc_ = input_.shape.dims[3] == 3 || input_.shape.dims[3] == 1; + + if (is_nhwc_) { + img_.height = input_.shape.dims[1]; + img_.width = input_.shape.dims[2]; + img_.size = input_.shape.dims[1] * input_.shape.dims[2] * input_.shape.dims[3]; + img_.format = input_.shape.dims[3] == 3 ? MA_PIXEL_FORMAT_RGB888 : MA_PIXEL_FORMAT_GRAYSCALE; + + } else { + img_.height = input_.shape.dims[2]; + img_.width = input_.shape.dims[3]; + img_.size = input_.shape.dims[3] * input_.shape.dims[2] * input_.shape.dims[1]; + img_.format = input_.shape.dims[1] == 3 ? MA_PIXEL_FORMAT_RGB888 : MA_PIXEL_FORMAT_GRAYSCALE; + } + + img_.data = input_.data.u8; +} + +Segmenter::~Segmenter() {} +ma_err_t Segmenter::preprocess() { + ma_err_t ret = MA_OK; + + ret = ma::cv::convert(input_img_, &img_); + if (ret != MA_OK) { + return ret; + } + if (input_.type == MA_TENSOR_TYPE_S8) { + for (int i = 0; i < input_.size; i++) { + input_.data.u8[i] -= 128; + } + } + + return ret; +} + +const void* Segmenter::getInput() { + return static_cast(&img_); +} + +const std::forward_list& Segmenter::getResults() const { + return results_; +} + +ma_err_t Segmenter::run(const ma_img_t* img) { + MA_ASSERT(img != nullptr); + + input_img_ = img; + + return underlyingRun(); +} + +ma_err_t Segmenter::setConfig(ma_model_cfg_opt_t opt, ...) { + ma_err_t ret = MA_OK; + va_list args; + va_start(args, opt); + switch (opt) { + case MA_MODEL_CFG_OPT_THRESHOLD: + threshold_score_ = va_arg(args, double); + ret = MA_OK; + break; + case MA_MODEL_CFG_OPT_NMS: + threshold_nms_ = va_arg(args, double); + ret = MA_OK; + break; + default: + ret = MA_EINVAL; + break; + } + va_end(args); + return ret; +} + +ma_err_t Segmenter::getConfig(ma_model_cfg_opt_t opt, ...) { + ma_err_t ret = MA_OK; + va_list args; + void* p_arg = nullptr; + va_start(args, opt); + switch (opt) { + case MA_MODEL_CFG_OPT_THRESHOLD: + p_arg = va_arg(args, void*); + *(static_cast(p_arg)) = threshold_score_; + break; + case MA_MODEL_CFG_OPT_NMS: + p_arg = va_arg(args, void*); + *(static_cast(p_arg)) = threshold_nms_; + break; + default: + ret = MA_EINVAL; + break; + } + va_end(args); + return ret; +} + +} // namespace ma::model diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_segmenter.h b/src/components/sscma-micro/sscma/core/model/ma_model_segmenter.h new file mode 100644 index 0000000..258b340 --- /dev/null +++ b/src/components/sscma-micro/sscma/core/model/ma_model_segmenter.h @@ -0,0 +1,43 @@ +#ifndef _MA_MODEL_SEGMENTER_H_ +#define _MA_MODEL_SEGMENTER_H_ + +#include + +#include "ma_model_base.h" + +namespace ma::model { + +class Segmenter : public Model { +protected: + ma_tensor_t input_; + ma_img_t img_; + const ma_img_t* input_img_; + + float threshold_nms_; + float threshold_score_; + + bool is_nhwc_; + + std::forward_list results_; + +protected: + ma_err_t preprocess() override; + +public: + Segmenter(Engine* engine, const char* name, ma_model_type_t type); + virtual ~Segmenter(); + + const std::forward_list& getResults() const; + + ma_err_t run(const ma_img_t* img); + + const void* getInput() override; + + ma_err_t setConfig(ma_model_cfg_opt_t opt, ...) override; + + ma_err_t getConfig(ma_model_cfg_opt_t opt, ...) override; +}; + +} // namespace ma::model + +#endif // _MA_MODEL_SEGMENTER_H_ diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_pose.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_pose.cpp new file mode 100644 index 0000000..e004525 --- /dev/null +++ b/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_pose.cpp @@ -0,0 +1,197 @@ +#include "ma_model_yolo11_pose.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "../math/ma_math.h" +#include "../utils/ma_anchors.h" +#include "../utils/ma_nms.h" + +namespace ma::model { + +Yolo11Pose::Yolo11Pose(Engine* p_engine_) : PoseDetector(p_engine_, "yolo11_pose", MA_MODEL_TYPE_YOLO11_POSE) { + MA_ASSERT(p_engine_ != nullptr); + + outputs_ = p_engine_->getOutput(0); + + num_class_ = 1; // only one class supported + num_record_ = outputs_.shape.dims[2]; + num_keypoints_ = (outputs_.shape.dims[1] - 5) / 3; + num_element_ = outputs_.shape.dims[1]; +} + +Yolo11Pose::~Yolo11Pose() {} + +bool Yolo11Pose::isValid(Engine* engine) { + + const auto inputs_count = engine->getInputSize(); + const auto outputs_count = engine->getOutputSize(); + + if (inputs_count != 1 || outputs_count != 1) { + return false; + } + const auto& input_shape = engine->getInputShape(0); + const auto& output_shape = engine->getOutputShape(0); + + // Validate input shape + if (input_shape.size != 4) + return false; + + int n = input_shape.dims[0], h = input_shape.dims[1], w = input_shape.dims[2], c = input_shape.dims[3]; + bool is_nhwc = c == 3 || c == 1; + + if (!is_nhwc) + std::swap(h, c); + + + if (n != 1 || h < 32 || h % 32 != 0 || (c != 3 && c != 1)) + return false; + + // Calculate expected output size based on input + int s = w >> 5, m = w >> 4, l = w >> 3; + int ibox_len = (s * s + m * m + l * l); + + // Validate output shape + if (output_shape.size != 3 && output_shape.size != 4) + return false; + + if (output_shape.dims[0] != 1 || output_shape.dims[2] != ibox_len || output_shape.dims[1] < 6) + return false; + + if ((output_shape.dims[1] - 5) % 3 != 0) + return false; + + return true; +} + +ma_err_t Yolo11Pose::postprocess() { + results_.clear(); + if (outputs_.type == MA_TENSOR_TYPE_F32) { + return postProcessF32(); + } else if (outputs_.type == MA_TENSOR_TYPE_S8) { + return postProcessI8(); + } + return MA_ENOTSUP; +} + +ma_err_t Yolo11Pose::postProcessI8() { + + const float score_threshold_non_sigmoid = ma::math::inverseSigmoid(threshold_score_); + + std::forward_list multi_level_bboxes; + + auto* data = outputs_.data.f32; + for (decltype(num_record_) i = 0; i < num_record_; ++i) { + auto score = data[i + num_record_ * 4]; + + if (score <= score_threshold_non_sigmoid) + continue; + + float x = ma::math::dequantizeValue(data[i], outputs_.quant_param.scale, outputs_.quant_param.zero_point); + float y = ma::math::dequantizeValue(data[i + num_record_], outputs_.quant_param.scale, outputs_.quant_param.zero_point); + float w = ma::math::dequantizeValue(data[i + num_record_ * 2], outputs_.quant_param.scale, outputs_.quant_param.zero_point); + float h = ma::math::dequantizeValue(data[i + num_record_ * 3], outputs_.quant_param.scale, outputs_.quant_param.zero_point); + + ma_bbox_ext_t bbox; + bbox.level = 0; + bbox.index = i; + bbox.x = x / img_.width; + bbox.y = y / img_.height; + bbox.w = w / img_.width; + bbox.h = h / img_.height; + bbox.score = ma::math::dequantizeValue(score, outputs_.quant_param.scale, outputs_.quant_param.zero_point); + bbox.target = 0; + + multi_level_bboxes.emplace_front(std::move(bbox)); + } + + ma::utils::nms(multi_level_bboxes, threshold_nms_, threshold_score_, false, true); + + if (multi_level_bboxes.empty()) { + return MA_OK; + } + + std::vector n_keypoint(num_keypoints_); + + for (auto& bbox : multi_level_bboxes) { + + for (int i = 0; i < num_keypoints_; ++i) { + auto index = bbox.index + num_record_ * (5 + i * 3); + n_keypoint[i].x = ma::math::dequantizeValue(data[index], outputs_.quant_param.scale, outputs_.quant_param.zero_point) / img_.width; + n_keypoint[i].y = ma::math::dequantizeValue(data[index + num_record_], outputs_.quant_param.scale, outputs_.quant_param.zero_point) / img_.height; + n_keypoint[i].z = ma::math::dequantizeValue(data[index + num_record_ * 2], outputs_.quant_param.scale, outputs_.quant_param.zero_point); + } + + ma_keypoint3f_t keypoint; + keypoint.box = {.x = bbox.x, .y = bbox.y, .w = bbox.w, .h = bbox.h, .score = bbox.score, .target = bbox.target}; + keypoint.pts = n_keypoint; + + + results_.emplace_front(std::move(keypoint)); + } + + return MA_OK; +} +ma_err_t Yolo11Pose::postProcessF32() { + + std::forward_list multi_level_bboxes; + + auto* data = outputs_.data.f32; + for (decltype(num_record_) i = 0; i < num_record_; ++i) { + auto score = data[i + num_record_ * 4]; + + if (score <= threshold_score_) + continue; + + float x = data[i]; + float y = data[i + num_record_]; + float w = data[i + num_record_ * 2]; + float h = data[i + num_record_ * 3]; + + ma_bbox_ext_t bbox; + bbox.level = 0; + bbox.index = i; + bbox.x = x / img_.width; + bbox.y = y / img_.height; + bbox.w = w / img_.width; + bbox.h = h / img_.height; + bbox.score = score; + bbox.target = 0; + + multi_level_bboxes.emplace_front(std::move(bbox)); + } + + ma::utils::nms(multi_level_bboxes, threshold_nms_, threshold_score_, false, true); + + if (multi_level_bboxes.empty()) { + return MA_OK; + } + + std::vector n_keypoint(num_keypoints_); + + for (auto& bbox : multi_level_bboxes) { + + for (int i = 0; i < num_keypoints_; ++i) { + auto index = bbox.index + num_record_ * (5 + i * 3); + n_keypoint[i].x = data[index] / img_.width; + n_keypoint[i].y = data[index + num_record_] / img_.height; + n_keypoint[i].z = data[index + num_record_ * 2]; + } + + ma_keypoint3f_t keypoint; + keypoint.box = {.x = bbox.x, .y = bbox.y, .w = bbox.w, .h = bbox.h, .score = bbox.score, .target = bbox.target}; + keypoint.pts = n_keypoint; + + + results_.emplace_front(std::move(keypoint)); + } + + return MA_OK; +} + +} // namespace ma::model diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_pose.h b/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_pose.h new file mode 100644 index 0000000..288b34a --- /dev/null +++ b/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_pose.h @@ -0,0 +1,36 @@ +#ifndef _MA_MODEL_YOLO11_POSE_H_ +#define _MA_MODEL_YOLO11_POSE_H_ + +#include +#include +#include +#include + +#include "ma_model_pose_detector.h" + +namespace ma::model { + +class Yolo11Pose : public PoseDetector { +private: + ma_tensor_t outputs_; + int32_t num_record_; + int32_t num_element_; + int32_t num_class_; + int32_t num_keypoints_; + +protected: + ma_err_t postprocess() override; + + ma_err_t postProcessI8(); + ma_err_t postProcessF32(); + +public: + Yolo11Pose(Engine* engine); + ~Yolo11Pose(); + + static bool isValid(Engine* engine); +}; + +} // namespace ma::model + +#endif // _MA_MODEL_YOLO_H diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_seg.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_seg.cpp new file mode 100644 index 0000000..a9ccfe8 --- /dev/null +++ b/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_seg.cpp @@ -0,0 +1,174 @@ +#include "ma_model_yolo11_seg.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "../math/ma_math.h" +#include "../utils/ma_nms.h" + +constexpr char TAG[] = "ma::model::yolo11_seg"; + +namespace ma::model { + +Yolo11Seg::Yolo11Seg(Engine* p_engine_) : Segmenter(p_engine_, "yolo11_seg", MA_MODEL_TYPE_YOLO11_SEG) { + MA_ASSERT(p_engine_ != nullptr); + + bboxes_ = p_engine_->getOutput(0); + protos_ = p_engine_->getOutput(1); + + num_class_ = bboxes_.shape.dims[1] - 36; // 4 + 1 + 32 + num_record_ = bboxes_.shape.dims[2]; +} + +Yolo11Seg::~Yolo11Seg() {} + +bool Yolo11Seg::isValid(Engine* engine) { + + const auto inputs_count = engine->getInputSize(); + const auto outputs_count = engine->getOutputSize(); + + if (inputs_count != 1 || outputs_count != 2) { + return false; + } + const auto& input_shape = engine->getInputShape(0); + const auto& output_shape = engine->getOutputShape(0); + const auto& mask_shape = engine->getOutputShape(1); + + // Validate input shape + if (input_shape.size != 4) { + return false; + } + + int n = input_shape.dims[0], h = input_shape.dims[1], w = input_shape.dims[2], c = input_shape.dims[3]; + bool is_nhwc = c == 3 || c == 1; + + if (!is_nhwc) + std::swap(h, c); + + + if (n != 1 || h < 32 || h % 32 != 0 || (c != 3 && c != 1)) { + return false; + } + + // Calculate expected output size based on input + int s = w >> 5, m = w >> 4, l = w >> 3; + int ibox_len = (s * s + m * m + l * l); + + // Validate output shape + if ((output_shape.size != 3 && output_shape.size != 4) || mask_shape.size != 4) { + return false; + } + + if (output_shape.dims[0] != 1 || output_shape.dims[2] != ibox_len || output_shape.dims[1] < 37) { + return false; + } + + if (mask_shape.dims[0] != 1 || mask_shape.dims[1] != 32 || mask_shape.dims[2] != w >> 2 || mask_shape.dims[3] != w >> 2) { + return false; + } + + return true; +} + +ma_err_t Yolo11Seg::postprocess() { + results_.clear(); + if (bboxes_.type == MA_TENSOR_TYPE_F32) { + return postProcessF32(); + } + return MA_ENOTSUP; +} + +ma_err_t Yolo11Seg::postProcessF32() { + + std::forward_list multi_level_bboxes; + auto* data = bboxes_.data.f32; + for (decltype(num_record_) i = 0; i < num_record_; ++i) { + + float max = threshold_score_; + int target = -1; + + for (int c = 0; c < num_class_; c++) { + float score = data[i + num_record_ * (4 + c)]; + if (score < max) [[likely]] { + continue; + } + max = score; + target = c; + } + + if (target < 0) + continue; + + float x = data[i]; + float y = data[i + num_record_]; + float w = data[i + num_record_ * 2]; + float h = data[i + num_record_ * 3]; + + + ma_bbox_ext_t bbox; + bbox.level = 0; + bbox.index = i; + bbox.x = x / img_.width; + bbox.y = y / img_.height; + bbox.w = w / img_.width; + bbox.h = h / img_.height; + bbox.score = max; + bbox.target = target; + + multi_level_bboxes.emplace_front(std::move(bbox)); + } + + ma::utils::nms(multi_level_bboxes, threshold_nms_, threshold_score_, false, true); + + if (multi_level_bboxes.empty()) + return MA_OK; + + // fetch mask + for (auto& bbox : multi_level_bboxes) { + ma_segm2f_t seg; + seg.box = {.x = bbox.x, .y = bbox.y, .w = bbox.w, .h = bbox.h, .score = bbox.score, .target = bbox.target}; + seg.mask.width = protos_.shape.dims[2]; + seg.mask.height = protos_.shape.dims[3]; + seg.mask.data.resize(protos_.shape.dims[2] * protos_.shape.dims[3] / 8, 0); // bitwise + + const int mask_size = protos_.shape.dims[2] * protos_.shape.dims[3]; + + std::vector masks(mask_size, 0.0f); + + // TODO: parallel for + for (int j = 0; j < protos_.shape.dims[1]; ++j) { + float mask_in = bboxes_.data.f32[bbox.index + num_record_ * (4 + num_class_ + j)]; + for (int i = 0; i < mask_size; ++i) { + masks[i] += mask_in * protos_.data.f32[j * mask_size + i]; + } + } + + int x1 = (bbox.x - bbox.w / 2) * protos_.shape.dims[2]; + int y1 = (bbox.y - bbox.h / 2) * protos_.shape.dims[3]; + int x2 = (bbox.x + bbox.w / 2) * protos_.shape.dims[2]; + int y2 = (bbox.y + bbox.h / 2) * protos_.shape.dims[3]; + + for (int i = 0; i < protos_.shape.dims[2]; i++) { + for (int j = 0; j < protos_.shape.dims[3]; j++) { + if (i < y1 || i >= y2 || j < x1 || j >= x2) [[likely]] { + continue; + } + if (masks[i * protos_.shape.dims[3] + j] > 0.5) { + seg.mask.data[i * protos_.shape.dims[3] / 8 + j / 8] |= (1 << (j % 8)); + } + } + } + + results_.emplace_front(std::move(seg)); + } + + + return MA_OK; +} + +} // namespace ma::model diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_seg.h b/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_seg.h new file mode 100644 index 0000000..f7025e3 --- /dev/null +++ b/src/components/sscma-micro/sscma/core/model/ma_model_yolo11_seg.h @@ -0,0 +1,34 @@ +#ifndef _MA_MODEL_YOLO11_SEG_H_ +#define _MA_MODEL_YOLO11_SEG_H_ + +#include +#include +#include +#include + +#include "ma_model_segmenter.h" + +namespace ma::model { + +class Yolo11Seg : public Segmenter { +private: + ma_tensor_t bboxes_; + ma_tensor_t protos_; + int32_t num_record_; + int32_t num_class_; + +protected: + ma_err_t postprocess() override; + + ma_err_t postProcessF32(); + +public: + Yolo11Seg(Engine* engine); + ~Yolo11Seg(); + + static bool isValid(Engine* engine); +}; + +} // namespace ma::model + +#endif // _MA_MODEL_YOLO_H diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_yolov5.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_yolov5.cpp index 7472726..07619f2 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_yolov5.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_yolov5.cpp @@ -1,6 +1,7 @@ #include #include -#include +#include +#include #include "../utils/ma_nms.h" @@ -22,8 +23,7 @@ YoloV5::YoloV5(Engine* p_engine_) : Detector(p_engine_, "yolov5", MA_MODEL_TYPE_ YoloV5::~YoloV5() {} -bool YoloV5::isValid(Engine* engine) { - +static bool generalValid(Engine* engine) { const auto inputs_count = engine->getInputSize(); const auto outputs_count = engine->getOutputSize(); @@ -62,9 +62,54 @@ bool YoloV5::isValid(Engine* engine) { return true; } -ma_err_t YoloV5::postprocess() { - results_.clear(); +static bool nmsValid(Engine* engine) { +#if MA_USE_ENGINE_HALIO + if (engine->getInputSize() != 1 || engine->getOutputSize() != 1) + return false; + + auto input = engine->getInput(0); + auto output = engine->getOutput(0); + + if (input.shape.size != 4 || output.shape.size != 4) + return false; + + auto n = input.shape.dims[0]; + auto h = input.shape.dims[1]; + auto w = input.shape.dims[2]; + auto c = input.shape.dims[3]; + + if (n != 1 || h < 32 || h % 32 != 0 || (c != 3 && c != 1)) + return false; + + auto b = output.shape.dims[0]; + auto cs = output.shape.dims[1]; + auto mb = output.shape.dims[2]; + auto f = output.shape.dims[3]; + + if (b != 1 || cs <= 0 || mb <= 1 || f != 0) + return false; + return true; +#else + return false; +#endif +} + +bool YoloV5::isValid(Engine* engine) { + if (!engine || engine->getOutputSize() != 1) + return false; + auto output = engine->getOutput(0); + + switch (output.type) { + case MA_TENSOR_TYPE_NMS_BBOX_U16: + case MA_TENSOR_TYPE_NMS_BBOX_F32: + return nmsValid(engine); + default: + return generalValid(engine); + } +} + +ma_err_t YoloV5::generalPostProcess() { if (output_.type == MA_TENSOR_TYPE_S8) { auto* data = output_.data.s8; auto scale = output_.quant_param.scale; @@ -161,4 +206,155 @@ ma_err_t YoloV5::postprocess() { return MA_OK; } + +ma_err_t YoloV5::nmsPostProcess() { +#if MA_USE_ENGINE_HALIO + + auto& output = output_; + + if (output.shape.size < 4) { + return MA_FAILED; + } + + size_t w = output.shape.dims[1]; + size_t h = output.shape.dims[2]; + size_t c = output.shape.dims[3]; + + hailo_nms_shape_t nms_shape; + if (output.external_handler) { + auto rc = (*reinterpret_cast(output.external_handler))(4, &nms_shape, sizeof(hailo_nms_shape_t)); + if (rc == MA_OK) { + w = nms_shape.number_of_classes; + h = nms_shape.max_bboxes_per_class; + c = nms_shape.max_accumulated_mask_size; + } + } + + switch (output.type) { + case MA_TENSOR_TYPE_NMS_BBOX_U16: { + using T = uint16_t; + using P = hailo_bbox_t; + + const auto zp = output.quant_param.zero_point; + const auto scale = output.quant_param.scale; + + auto ptr = output.data.u8; + for (size_t i = 0; i < w; ++i) { + auto bc = *reinterpret_cast(ptr); + ptr += sizeof(T); + + if (bc <= 0) { + continue; + } else if (bc > h) { + break; + } + + for (size_t j = 0; j < static_cast(bc); ++j) { + auto bbox = *reinterpret_cast(ptr); + ptr += sizeof(P); + + ma_bbox_t res; + + auto x_min = static_cast(bbox.x_min - zp) * scale; + auto y_min = static_cast(bbox.y_min - zp) * scale; + auto x_max = static_cast(bbox.x_max - zp) * scale; + auto y_max = static_cast(bbox.y_max - zp) * scale; + res.w = x_max - x_min; + res.h = y_max - y_min; + res.x = x_min + res.w * 0.5; + res.y = y_min + res.h * 0.5; + res.score = static_cast(bbox.score - zp) * scale; + + res.target = static_cast(i); + + res.x = MA_CLIP(res.x, 0, 1.0f); + res.y = MA_CLIP(res.y, 0, 1.0f); + res.w = MA_CLIP(res.w, 0, 1.0f); + res.h = MA_CLIP(res.h, 0, 1.0f); + + results_.emplace_front(res); + } + } + } break; + + case MA_TENSOR_TYPE_NMS_BBOX_F32: { + using T = float32_t; + using P = hailo_bbox_float32_t; + + auto ptr = output.data.u8; + for (size_t i = 0; i < w; ++i) { + auto bc = *reinterpret_cast(ptr); + ptr += sizeof(T); + + if (bc <= 0) { + continue; + } else if (bc > h) { + break; + } + + for (size_t j = 0; j < static_cast(bc); ++j) { + auto bbox = *reinterpret_cast(ptr); + ptr += sizeof(P); + + ma_bbox_t res; + + res.w = bbox.x_max - bbox.x_min; + res.h = bbox.y_max - bbox.y_min; + res.x = bbox.x_min + res.w * 0.5; + res.y = bbox.y_min + res.h * 0.5; + res.score = bbox.score; + + res.target = static_cast(i); + + res.x = MA_CLIP(res.x, 0, 1.0f); + res.y = MA_CLIP(res.y, 0, 1.0f); + res.w = MA_CLIP(res.w, 0, 1.0f); + res.h = MA_CLIP(res.h, 0, 1.0f); + + results_.emplace_front(res); + } + } + } break; + + default: + return MA_ENOTSUP; + } + + return MA_OK; +#else + return MA_FAILED; +#endif +} + +ma_err_t YoloV5::postprocess() { + results_.clear(); + + switch (output_.type) { + case MA_TENSOR_TYPE_NMS_BBOX_U16: + case MA_TENSOR_TYPE_NMS_BBOX_F32: { +#if MA_USE_ENGINE_HALIO + // TODO: can be optimized by whihout calling this handler for each frame + if (output.external_handler) { + auto ph = reinterpret_cast(output.external_handler); + float thr = threshold_score_; + auto rc = (*ph)(1, &thr, sizeof(float)); + if (rc == MA_OK) { + threshold_score_ = thr; + } + thr = threshold_nms_; + rc = (*ph)(3, &thr, sizeof(float)); + if (rc == MA_OK) { + threshold_nms_ = thr; + } + } +#endif + return nmsPostProcess(); + } + + default: + return generalPostProcess(); + } + + return MA_ENOTSUP; +} } // namespace ma::model diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_yolov5.h b/src/components/sscma-micro/sscma/core/model/ma_model_yolov5.h index 002855b..6941d79 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_yolov5.h +++ b/src/components/sscma-micro/sscma/core/model/ma_model_yolov5.h @@ -24,6 +24,8 @@ class YoloV5 : public Detector { protected: ma_err_t postprocess() override; + ma_err_t generalPostProcess(); + ma_err_t nmsPostProcess(); public: YoloV5(Engine* engine); diff --git a/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose.cpp b/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose.cpp index 0b8165a..5fbbc38 100644 --- a/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose.cpp +++ b/src/components/sscma-micro/sscma/core/model/ma_model_yolov8_pose.cpp @@ -41,26 +41,26 @@ YoloV8Pose::YoloV8Pose(Engine* p_engine_) : PoseDetector(p_engine_, "yolo_world" const auto dim_2 = outputs_[i].shape.dims[2]; switch (dim_2) { - case 1: - for (size_t j = 0; j < anchor_variants_; ++j) { - if (dim_1 == static_cast(anchor_strides_[j].size)) { - output_scores_ids_[j] = i; - break; + case 1: + for (size_t j = 0; j < anchor_variants_; ++j) { + if (dim_1 == static_cast(anchor_strides_[j].size)) { + output_scores_ids_[j] = i; + break; + } } - } - break; - case 64: - for (size_t j = 0; j < anchor_variants_; ++j) { - if (dim_1 == static_cast(anchor_strides_[j].size)) { - output_bboxes_ids_[j] = i; - break; + break; + case 64: + for (size_t j = 0; j < anchor_variants_; ++j) { + if (dim_1 == static_cast(anchor_strides_[j].size)) { + output_bboxes_ids_[j] = i; + break; + } + } + break; + default: + if (dim_2 % 3 == 0) { + output_keypoints_id_ = i; } - } - break; - default: - if (dim_2 % 3 == 0) { - output_keypoints_id_ = i; - } } } } @@ -103,10 +103,7 @@ bool YoloV8Pose::isValid(Engine* engine) { auto anchor_strides_1 = ma::utils::generateAnchorStrides(std::min(h, w)); auto anchor_strides_2 = anchor_strides_1; - auto sum = - std::accumulate(anchor_strides_1.begin(), anchor_strides_1.end(), 0u, [](auto sum, const auto& anchor_stride) { - return sum + anchor_stride.size; - }); + auto sum = std::accumulate(anchor_strides_1.begin(), anchor_strides_1.end(), 0u, [](auto sum, const auto& anchor_stride) { return sum + anchor_stride.size; }); // Note: would fail if the model has 64 classes for (size_t i = 0; i < num_outputs_; ++i) { @@ -116,39 +113,35 @@ bool YoloV8Pose::isValid(Engine* engine) { } switch (output_shape.dims[2]) { - case 1: { - auto it = std::find_if(anchor_strides_1.begin(), - anchor_strides_1.end(), - [&output_shape](const ma_anchor_stride_t& anchor_stride) { - return static_cast(anchor_stride.size) == output_shape.dims[1]; - }); - if (it == anchor_strides_1.end()) { - return false; - } else { - anchor_strides_1.erase(it); - } - } break; - - case 64: { - auto it = std::find_if(anchor_strides_2.begin(), - anchor_strides_2.end(), - [&output_shape](const ma_anchor_stride_t& anchor_stride) { - return static_cast(anchor_stride.size) == output_shape.dims[1]; - }); - if (it == anchor_strides_2.end()) { - return false; - } else { - anchor_strides_2.erase(it); - } - } break; + case 1: { + auto it = std::find_if(anchor_strides_1.begin(), anchor_strides_1.end(), [&output_shape](const ma_anchor_stride_t& anchor_stride) { + return static_cast(anchor_stride.size) == output_shape.dims[1]; + }); + if (it == anchor_strides_1.end()) { + return false; + } else { + anchor_strides_1.erase(it); + } + } break; + + case 64: { + auto it = std::find_if(anchor_strides_2.begin(), anchor_strides_2.end(), [&output_shape](const ma_anchor_stride_t& anchor_stride) { + return static_cast(anchor_stride.size) == output_shape.dims[1]; + }); + if (it == anchor_strides_2.end()) { + return false; + } else { + anchor_strides_2.erase(it); + } + } break; - default: - if (output_shape.dims[2] % 3 != 0) { - return false; - } - if (output_shape.dims[1] != static_cast(sum)) { - return false; - } + default: + if (output_shape.dims[2] % 3 != 0) { + return false; + } + if (output_shape.dims[1] != static_cast(sum)) { + return false; + } } } @@ -159,36 +152,38 @@ bool YoloV8Pose::isValid(Engine* engine) { return true; } -const char* YoloV8Pose::getTag() { return "ma::model::yolo_world"; } +const char* YoloV8Pose::getTag() { + return "ma::model::yolo_world"; +} ma_err_t YoloV8Pose::postprocess() { uint8_t check = 0; for (size_t i = 0; i < num_outputs_; ++i) { switch (outputs_[i].type) { - case MA_TENSOR_TYPE_S8: - break; + case MA_TENSOR_TYPE_S8: + break; - case MA_TENSOR_TYPE_F32: - check |= 1 << i; - break; + case MA_TENSOR_TYPE_F32: + check |= 1 << i; + break; - default: - return MA_ENOTSUP; + default: + return MA_ENOTSUP; } } switch (check) { - case 0: - return postProcessI8(); + case 0: + return postProcessI8(); #ifdef MA_MODEL_POSTPROCESS_FP32_VARIANT - case 0b1111111: - return postProcessF32(); + case 0b1111111: + return postProcessF32(); #endif - default: - return MA_ENOTSUP; + default: + return MA_ENOTSUP; } return MA_ENOTSUP; @@ -213,27 +208,26 @@ ma_err_t YoloV8Pose::postProcessI8() { const auto anchor_matrix_size = anchor_matrix_.size(); for (size_t i = 0; i < anchor_matrix_size; ++i) { - const auto output_scores_id = output_scores_ids_[i]; - const auto* output_scores = output_data[output_scores_id]; + const auto output_scores_id = output_scores_ids_[i]; + const auto* output_scores = output_data[output_scores_id]; const size_t output_scores_shape_dims_2 = outputs_[output_scores_id].shape.dims[2]; - const auto output_scores_quant_parm = outputs_[output_scores_id].quant_param; + const auto output_scores_quant_parm = outputs_[output_scores_id].quant_param; - const auto output_bboxes_id = output_bboxes_ids_[i]; - const auto* output_bboxes = output_data[output_bboxes_id]; + const auto output_bboxes_id = output_bboxes_ids_[i]; + const auto* output_bboxes = output_data[output_bboxes_id]; const size_t output_bboxes_shape_dims_2 = outputs_[output_bboxes_id].shape.dims[2]; - const auto output_bboxes_quant_parm = outputs_[output_bboxes_id].quant_param; + const auto output_bboxes_quant_parm = outputs_[output_bboxes_id].quant_param; - const auto& anchor_array = anchor_matrix_[i]; - const auto anchor_array_size = anchor_array.size(); + const auto& anchor_array = anchor_matrix_[i]; + const auto anchor_array_size = anchor_array.size(); - const int32_t score_threshold_quan_non_sigmoid = ma::math::quantizeValueFloor( - score_threshold_non_sigmoid, output_scores_quant_parm.zero_point, output_scores_quant_parm.scale); + const int32_t score_threshold_quan_non_sigmoid = ma::math::quantizeValueFloor(score_threshold_non_sigmoid, output_scores_quant_parm.zero_point, output_scores_quant_parm.scale); for (size_t j = 0; j < anchor_array_size; ++j) { const auto j_mul_output_scores_shape_dims_2 = j * output_scores_shape_dims_2; - auto max_score_raw = score_threshold_quan_non_sigmoid; - int32_t target = -1; + auto max_score_raw = score_threshold_quan_non_sigmoid; + int32_t target = -1; for (size_t k = 0; k < output_scores_shape_dims_2; ++k) { int8_t score = output_scores[j_mul_output_scores_shape_dims_2 + k]; @@ -245,10 +239,10 @@ ma_err_t YoloV8Pose::postProcessI8() { target = k; } - if (target < 0) continue; + if (target < 0) + continue; - const float real_score = ma::math::sigmoid(ma::math::dequantizeValue( - max_score_raw, output_scores_quant_parm.zero_point, output_scores_quant_parm.scale)); + const float real_score = ma::math::sigmoid(ma::math::dequantizeValue(max_score_raw, output_scores_quant_parm.zero_point, output_scores_quant_parm.scale)); // DFL float dist[4]; @@ -258,9 +252,7 @@ ma_err_t YoloV8Pose::postProcessI8() { for (size_t m = 0; m < 4; ++m) { const size_t offset = pre + m * 16; for (size_t n = 0; n < 16; ++n) { - matrix[n] = ma::math::dequantizeValue(static_cast(output_bboxes[offset + n]), - output_bboxes_quant_parm.zero_point, - output_bboxes_quant_parm.scale); + matrix[n] = ma::math::dequantizeValue(static_cast(output_bboxes[offset + n]), output_bboxes_quant_parm.zero_point, output_bboxes_quant_parm.scale); } ma::math::softmax(matrix, 16); @@ -296,15 +288,13 @@ ma_err_t YoloV8Pose::postProcessI8() { ma::utils::nms(multi_level_bboxes, threshold_nms_, threshold_score_, false, true); if (multi_level_bboxes.empty()) { - results_.shrink_to_fit(); - return MA_OK; } - const auto* output_keypoints = output_data[output_keypoints_id_]; - const auto output_keypoints_dims_2 = outputs_[output_keypoints_id_].shape.dims[2]; - const auto output_keypoints_quant_parm = outputs_[output_keypoints_id_].quant_param; - const size_t keypoint_nums = output_keypoints_dims_2 / 3; + const auto* output_keypoints = output_data[output_keypoints_id_]; + const auto output_keypoints_dims_2 = outputs_[output_keypoints_id_].shape.dims[2]; + const auto output_keypoints_quant_parm = outputs_[output_keypoints_id_].quant_param; + const size_t keypoint_nums = output_keypoints_dims_2 / 3; std::vector n_keypoint(keypoint_nums); @@ -314,18 +304,11 @@ ma_err_t YoloV8Pose::postProcessI8() { for (size_t i = 0; i < keypoint_nums; ++i) { const auto offset = pre + i * 3; - const float x = ma::math::dequantizeValue(static_cast(output_keypoints[offset]), - output_keypoints_quant_parm.zero_point, - output_keypoints_quant_parm.scale); + const float x = ma::math::dequantizeValue(static_cast(output_keypoints[offset]), output_keypoints_quant_parm.zero_point, output_keypoints_quant_parm.scale); - const float y = ma::math::dequantizeValue(static_cast(output_keypoints[offset + 1]), - output_keypoints_quant_parm.zero_point, - output_keypoints_quant_parm.scale); + const float y = ma::math::dequantizeValue(static_cast(output_keypoints[offset + 1]), output_keypoints_quant_parm.zero_point, output_keypoints_quant_parm.scale); - const float z = - ma::math::sigmoid(ma::math::dequantizeValue(static_cast(output_keypoints[offset + 2]), - output_keypoints_quant_parm.zero_point, - output_keypoints_quant_parm.scale)); + const float z = ma::math::sigmoid(ma::math::dequantizeValue(static_cast(output_keypoints[offset + 2]), output_keypoints_quant_parm.zero_point, output_keypoints_quant_parm.scale)); n_keypoint[i] = {x, y, z}; } @@ -334,11 +317,9 @@ ma_err_t YoloV8Pose::postProcessI8() { keypoint.box = {.x = bbox.x, .y = bbox.y, .w = bbox.w, .h = bbox.h, .score = bbox.score, .target = bbox.target}; keypoint.pts = n_keypoint; - results_.push_back(std::move(keypoint)); + results_.emplace_front(std::move(keypoint)); } - results_.shrink_to_fit(); - return MA_OK; } @@ -362,22 +343,22 @@ ma_err_t YoloV8Pose::postProcessF32() { const auto anchor_matrix_size = anchor_matrix_.size(); for (size_t i = 0; i < anchor_matrix_size; ++i) { - const auto output_scores_id = output_scores_ids_[i]; - const auto* output_scores = output_data[output_scores_id]; + const auto output_scores_id = output_scores_ids_[i]; + const auto* output_scores = output_data[output_scores_id]; const size_t output_scores_shape_dims_2 = outputs_[output_scores_id].shape.dims[2]; - const auto output_bboxes_id = output_bboxes_ids_[i]; - const auto* output_bboxes = output_data[output_bboxes_id]; + const auto output_bboxes_id = output_bboxes_ids_[i]; + const auto* output_bboxes = output_data[output_bboxes_id]; const size_t output_bboxes_shape_dims_2 = outputs_[output_bboxes_id].shape.dims[2]; - const auto& anchor_array = anchor_matrix_[i]; - const auto anchor_array_size = anchor_array.size(); + const auto& anchor_array = anchor_matrix_[i]; + const auto anchor_array_size = anchor_array.size(); for (size_t j = 0; j < anchor_array_size; ++j) { const auto j_mul_output_scores_shape_dims_2 = j * output_scores_shape_dims_2; - auto max_score_raw = score_threshold_non_sigmoid; - int32_t target = -1; + auto max_score_raw = score_threshold_non_sigmoid; + int32_t target = -1; for (size_t k = 0; k < output_scores_shape_dims_2; ++k) { int8_t score = output_scores[j_mul_output_scores_shape_dims_2 + k]; @@ -389,7 +370,8 @@ ma_err_t YoloV8Pose::postProcessF32() { target = k; } - if (target < 0) continue; + if (target < 0) + continue; const float real_score = ma::math::sigmoid(max_score_raw); @@ -437,15 +419,13 @@ ma_err_t YoloV8Pose::postProcessF32() { ma::utils::nms(multi_level_bboxes, threshold_nms_, threshold_score_, false, true); if (multi_level_bboxes.empty()) { - results_.shrink_to_fit(); - return MA_OK; } - const auto* output_keypoints = output_data[output_keypoints_id_]; - const auto output_keypoints_dims_2 = outputs_[output_keypoints_id_].shape.dims[2]; - const auto output_keypoints_quant_parm = outputs_[output_keypoints_id_].quant_param; - const size_t keypoint_nums = output_keypoints_dims_2 / 3; + const auto* output_keypoints = output_data[output_keypoints_id_]; + const auto output_keypoints_dims_2 = outputs_[output_keypoints_id_].shape.dims[2]; + const auto output_keypoints_quant_parm = outputs_[output_keypoints_id_].quant_param; + const size_t keypoint_nums = output_keypoints_dims_2 / 3; std::vector n_keypoint(keypoint_nums); @@ -468,11 +448,9 @@ ma_err_t YoloV8Pose::postProcessF32() { keypoint.box = {.x = bbox.x, .y = bbox.y, .w = bbox.w, .h = bbox.h, .score = bbox.score, .target = bbox.target}; keypoint.pts = n_keypoint; - results_.push_back(std::move(keypoint)); + results_.empalce_front(std::move(keypoint)); } - results_.shrink_to_fit(); - return MA_OK; } #endif