Skip to content

Commit

Permalink
add YOLOv8/YOLO11 OBB support
Browse files Browse the repository at this point in the history
  • Loading branch information
Neutree committed Dec 20, 2024
1 parent dc0e11b commit ec9a6c4
Show file tree
Hide file tree
Showing 3 changed files with 269 additions and 61 deletions.
80 changes: 72 additions & 8 deletions components/nn/include/maix_nn_object.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ namespace maix::nn
* @maixpy maix.nn.Object.__init__
* @maixcdk maix.nn.Object.Object
*/
Object(int x = 0, int y = 0, int w = 0, int h = 0, int class_id = 0, float score = 0, std::vector<int> points = std::vector<int>())
: x(x), y(y), w(w), h(h), class_id(class_id), score(score), points(points), seg_mask(NULL), temp(NULL)
Object(int x = 0, int y = 0, int w = 0, int h = 0, int class_id = 0, float score = 0, std::vector<int> points = std::vector<int>(), float angle = -9999)
: x(x), y(y), w(w), h(h), class_id(class_id), score(score), points(points), angle(angle), seg_mask(NULL), temp(NULL)
{
}

Expand All @@ -48,7 +48,56 @@ namespace maix::nn
*/
std::string to_str()
{
return "x: " + std::to_string(x) + ", y: " + std::to_string(y) + ", w: " + std::to_string(w) + ", h: " + std::to_string(h) + ", class_id: " + std::to_string(class_id) + ", score: " + std::to_string(score);
if(angle != -9999)
return "x: " + std::to_string(x) + ", y: " + std::to_string(y) + ", w: " + std::to_string(w) + ", h: " + std::to_string(h) + ", class_id: " + std::to_string(class_id) + ", score: " + std::to_string(score) + ", angle: " + std::to_string(angle);
else
return "x: " + std::to_string(x) + ", y: " + std::to_string(y) + ", w: " + std::to_string(w) + ", h: " + std::to_string(h) + ", class_id: " + std::to_string(class_id) + ", score: " + std::to_string(score);
}

/**
* Get OBB(oriented bounding box) points, auto calculated according to x,y,w,h,angle
* @maixpy maix.nn.Object.get_obb_points
*/
std::vector<int> get_obb_points()
{
// x, y, w, h, angle to x1,y1,x2,y2,x3,y3,x4,y4 list
std::vector<int> obb_points;
if(angle == -9999)
{
obb_points.push_back(x);
obb_points.push_back(y);
obb_points.push_back(x + w);
obb_points.push_back(y);
obb_points.push_back(x + w);
obb_points.push_back(y + h);
obb_points.push_back(x);
obb_points.push_back(y + h);
}
else
{
float angle_rad = angle * M_PI;
float c = cosf(angle_rad);
float s = sinf(angle_rad);
int x1 = x;
int y1 = y;
int x2 = x + w;
int y2 = y;
int x3 = x + w;
int y3 = y + h;
int x4 = x;
int y4 = y + h;
int cx = x + w / 2;
int cy = y + h / 2;
obb_points.push_back((int)(c * (x1 - cx) - s * (y1 - cy) + cx));
obb_points.push_back((int)(s * (x1 - cx) + c * (y1 - cy) + cy));
obb_points.push_back((int)(c * (x2 - cx) - s * (y2 - cy) + cx));
obb_points.push_back((int)(s * (x2 - cx) + c * (y2 - cy) + cy));
obb_points.push_back((int)(c * (x3 - cx) - s * (y3 - cy) + cx));
obb_points.push_back((int)(s * (x3 - cx) + c * (y3 - cy) + cy));
obb_points.push_back((int)(c * (x4 - cx) - s * (y4 - cy) + cx));
obb_points.push_back((int)(s * (x4 - cx) + c * (y4 - cy) + cy));
}
return obb_points;
}

/**
Expand Down Expand Up @@ -93,6 +142,12 @@ namespace maix::nn
*/
std::vector<int> points;

/**
* Rotate angle, -9999 means not set, value is a percentage, need to multiply 180 to get the real angle or multiply PI to get the radian.
* @maixpy maix.nn.Object.angle
*/
float angle;

/**
* segmentation mask, uint8 list type, shape is h * w but flattened to one dimension, value fron 0 to 255.
* @attention For efficiency, it's a pointer in C++, use this carefully!
Expand Down Expand Up @@ -124,8 +179,8 @@ namespace maix::nn
* @maixpy maix.nn.ObjectFloat.__init__
* @maixcdk maix.nn.ObjectFloat.ObjectFloat
*/
ObjectFloat(float x = 0, float y = 0, float w = 0, float h = 0, float class_id = 0, float score = 0, std::vector<float> points = std::vector<float>())
: x(x), y(y), w(w), h(h), class_id(class_id), score(score), points(points), temp(NULL)
ObjectFloat(float x = 0, float y = 0, float w = 0, float h = 0, float class_id = 0, float score = 0, std::vector<float> points = std::vector<float>(), float angle = -1)
: x(x), y(y), w(w), h(h), class_id(class_id), score(score), points(points), angle(angle), temp(NULL)
{
}

Expand All @@ -141,7 +196,10 @@ namespace maix::nn
*/
std::string to_str()
{
return "x: " + std::to_string(x) + ", y: " + std::to_string(y) + ", w: " + std::to_string(w) + ", h: " + std::to_string(h) + ", class_id: " + std::to_string(class_id) + ", score: " + std::to_string(score);
if(angle >= 0)
return "x: " + std::to_string(x) + ", y: " + std::to_string(y) + ", w: " + std::to_string(w) + ", h: " + std::to_string(h) + ", class_id: " + std::to_string(class_id) + ", score: " + std::to_string(score) + ", angle: " + std::to_string(angle);
else
return "x: " + std::to_string(x) + ", y: " + std::to_string(y) + ", w: " + std::to_string(w) + ", h: " + std::to_string(h) + ", class_id: " + std::to_string(class_id) + ", score: " + std::to_string(score);
}

/**
Expand Down Expand Up @@ -186,6 +244,12 @@ namespace maix::nn
*/
std::vector<float> points;

/**
* Rotate angle
* @maixpy maix.nn.ObjectFloat.angle
*/
float angle;

/**
* For temperary usage, not for MaixPy API
*/
Expand Down Expand Up @@ -226,9 +290,9 @@ namespace maix::nn
* @throw Throw exception if no memory
* @maixpy maix.nn.Objects.add
*/
nn::Object &add(int x = 0, int y = 0, int w = 0, int h = 0, int class_id = 0, float score = 0, std::vector<int> points = std::vector<int>())
nn::Object &add(int x = 0, int y = 0, int w = 0, int h = 0, int class_id = 0, float score = 0, std::vector<int> points = std::vector<int>(), float angle = -1)
{
Object *obj = new Object(x, y, w, h, class_id, score, points);
Object *obj = new Object(x, y, w, h, class_id, score, points, angle);
if(!obj)
throw err::Exception(err::ERR_NO_MEM);
obj->seg_mask = NULL;
Expand Down
124 changes: 98 additions & 26 deletions components/nn/include/maix_nn_yolo11.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,16 @@
#include "maix_image.hpp"
#include "maix_nn_F.hpp"
#include "maix_nn_object.hpp"
#include <math.h>

namespace maix::nn
{
enum class YOLO11_Type
{
DETECT = 0,
POSE = 1,
SEG = 2
SEG = 2,
OBB = 3
};

class _KpInfoYolo11
Expand Down Expand Up @@ -207,6 +209,10 @@ namespace maix::nn
{
_type = YOLO11_Type::SEG;
}
else if (_extra_info["type"] == "obb")
{
_type = YOLO11_Type::OBB;
}
else if (_extra_info["type"] != "detector")
{
log::error("type [%s] not support, suport detector and pose", _extra_info["type"].c_str());
Expand Down Expand Up @@ -305,11 +311,14 @@ namespace maix::nn
* @param points keypoits, int list type, [x, y, x, y ...]
* @param radius radius of points.
* @param color color of points.
* @param colors assign colors for points, list type, element is image.Color object.
* @param body true, if points' length is 17*2 and body is ture, will draw lines as human body, if set to false won't draw lines, default true.
* @param close connect all points to close a polygon, default false.
* @maixpy maix.nn.YOLO11.draw_pose
*/
void draw_pose(image::Image &img, std::vector<int> points, int radius = 4, image::Color color = image::COLOR_RED, bool body = true)
void draw_pose(image::Image &img, std::vector<int> points, int radius = 4, image::Color color = image::COLOR_RED, const std::vector<image::Color> &colors = std::vector<image::Color>(), bool body = true, bool close = false)
{
bool line_drawed = false;
if (points.size() < 2 || points.size() % 2 != 0)
{
throw std::runtime_error("keypoints size must >= 2 and multiple of 2");
Expand All @@ -332,14 +341,33 @@ namespace maix::nn
int y = (points[5 * 2 + 1] + points[6 * 2 + 1]) / 2;
if (!(points[5 * 2] < 0 || points[5 * 2 + 1] < 0 || points[6 * 2] < 0 || points[6 * 2 + 1] < 0 || x < 0 || y < 0 || points[0] < 0 || points[1] < 0))
img.draw_line(points[0], points[1], x, y, color, 2);
line_drawed = true;
}
for (size_t i = 0; i < points.size() / 2; ++i)
{
int x = points[i * 2];
int y = points[i * 2 + 1];
if (x < 0 || y < 0)
continue;
img.draw_circle(x, y, radius, color, -1);
auto &_color = color;
if (colors.size() > i)
{
_color = colors[i];
}
img.draw_circle(x, y, radius, _color, -1);
}
if(close && !line_drawed)
{
for (size_t i = 0; i < points.size() / 2; ++i)
{
int x1 = points[i * 2];
int y1 = points[i * 2 + 1];
int x2 = points[(i + 1) % (points.size() / 2) * 2];
int y2 = points[(i + 1) % (points.size() / 2) * 2 + 1];
if (x1 < 0 || y1 < 0 || x2 < 0 || y2 < 0)
continue;
img.draw_line(x1, y1, x2, y2, color, 2);
}
}
}

Expand Down Expand Up @@ -485,11 +513,11 @@ namespace maix::nn
tensor::Tensor *box_out = NULL; // shape 1, 1, 4, 8400
for (auto i : *outputs)
{
if (i.second->shape()[2] == 4)
if (i.second->shape()[2] == 4 && !box_out)
{
box_out = i.second;
}
else if (strstr(i.first.c_str(), "Sigmoid") != NULL)
else if (strstr(i.first.c_str(), "Sigmoid") != NULL && !score_out)
{
score_out = i.second;
if((size_t)score_out->shape()[1] != labels.size())
Expand All @@ -507,7 +535,7 @@ namespace maix::nn
*kp_out = i.second;
}
}
if (!score_out || !kp_out)
if (!score_out || !box_out)
{
throw err::Exception(err::ERR_ARGS, "model output not valid");
}
Expand All @@ -520,30 +548,74 @@ namespace maix::nn
0,
(int)(h / stride[0] * w / stride[0]),
(int)(h / stride[0] * w / stride[0] + h / stride[1] * w / stride[1])};
for (int i = 0; i < 3; i++)
if (_type == YOLO11_Type::OBB)
{
float *angle_ptr = (float *)(*kp_out)->data();
for (int i = 0; i < 3; i++)
{
int nh = h / stride[i];
int nw = w / stride[i];
for (int ay = 0; ay < nh; ++ay)
{
for (int ax = 0; ax < nw; ++ax)
{
int offset = idx_start[i] + ay * nw + ax;
int class_id = _argmax(scores_ptr + offset, class_num, total_box_num);
// int max_idx = _argmax2(scores_ptr + offset, class_num * total_box_num - offset, total_box_num);
// int class_id = (offset + max_idx) / total_box_num;
float obj_score = scores_ptr[offset + class_id * total_box_num];
if (obj_score <= conf_thresh)
{
continue;
}
float angle = (angle_ptr[offset] - 0.25);
float angle_rad = angle * M_PI;
float cos_angle = cosf(angle_rad);
float sin_angle = sinf(angle_rad);
float lt_x = dets_ptr[offset];
float lt_y = dets_ptr[offset + total_box_num];
float rb_x = dets_ptr[offset + total_box_num * 2];
float rb_y = dets_ptr[offset + total_box_num * 3];
float xf = (rb_x - lt_x) / 2.0;
float yf = (rb_y - lt_y) / 2.0;
float bbox_w = (lt_x + rb_x) * stride[i];
float bbox_h = (lt_y + rb_y) * stride[i];
float bbox_x = ((xf * cos_angle - yf * sin_angle) + ax + 0.5) * stride[i] - bbox_w * 0.5;
float bbox_y = ((xf * sin_angle + yf * cos_angle) + ay + 0.5) * stride[i] - bbox_h * 0.5;
_KpInfoYolo11 *kp_info = new _KpInfoYolo11(offset, ax, ay, stride[i]);
Object &obj = objs.add(bbox_x, bbox_y, bbox_w, bbox_h, class_id, obj_score, {}, angle);
obj.temp = (void *)kp_info;
}
}
}
}
else
{
int nh = h / stride[i];
int nw = w / stride[i];
for (int ay = 0; ay < nh; ++ay)
for (int i = 0; i < 3; i++)
{
for (int ax = 0; ax < nw; ++ax)
int nh = h / stride[i];
int nw = w / stride[i];
for (int ay = 0; ay < nh; ++ay)
{
int offset = idx_start[i] + ay * nw + ax;
int class_id = _argmax(scores_ptr + offset, class_num, total_box_num);
// int max_idx = _argmax2(scores_ptr + offset, class_num * total_box_num - offset, total_box_num);
// int class_id = (offset + max_idx) / total_box_num;
float obj_score = scores_ptr[offset + class_id * total_box_num];
if (obj_score <= conf_thresh)
for (int ax = 0; ax < nw; ++ax)
{
continue;
int offset = idx_start[i] + ay * nw + ax;
int class_id = _argmax(scores_ptr + offset, class_num, total_box_num);
// int max_idx = _argmax2(scores_ptr + offset, class_num * total_box_num - offset, total_box_num);
// int class_id = (offset + max_idx) / total_box_num;
float obj_score = scores_ptr[offset + class_id * total_box_num];
if (obj_score <= conf_thresh)
{
continue;
}
float bbox_x = (ax + 0.5 - dets_ptr[offset]) * stride[i];
float bbox_y = (ay + 0.5 - dets_ptr[offset + total_box_num]) * stride[i];
float bbox_w = (ax + 0.5 + dets_ptr[offset + total_box_num * 2]) * stride[i] - bbox_x;
float bbox_h = (ay + 0.5 + dets_ptr[offset + total_box_num * 3]) * stride[i] - bbox_y;
_KpInfoYolo11 *kp_info = new _KpInfoYolo11(offset, ax, ay, stride[i]);
Object &obj = objs.add(bbox_x, bbox_y, bbox_w, bbox_h, class_id, obj_score);
obj.temp = (void *)kp_info;
}
float bbox_x = (ax + 0.5 - dets_ptr[offset]) * stride[i];
float bbox_y = (ay + 0.5 - dets_ptr[offset + total_box_num]) * stride[i];
float bbox_w = (ax + 0.5 + dets_ptr[offset + total_box_num * 2]) * stride[i] - bbox_x;
float bbox_h = (ay + 0.5 + dets_ptr[offset + total_box_num * 3]) * stride[i] - bbox_y;
_KpInfoYolo11 *kp_info = new _KpInfoYolo11(offset, ax, ay, stride[i]);
Object &obj = objs.add(bbox_x, bbox_y, bbox_w, bbox_h, class_id, obj_score);
obj.temp = (void *)kp_info;
}
}
}
Expand Down Expand Up @@ -575,7 +647,7 @@ namespace maix::nn
{
if (a->score != 0)
{
Object &obj = result->add(a->x, a->y, a->w, a->h, a->class_id, a->score, a->points);
Object &obj = result->add(a->x, a->y, a->w, a->h, a->class_id, a->score, a->points, a->angle);
if (obj.x < 0)
{
obj.w += obj.x;
Expand Down
Loading

0 comments on commit ec9a6c4

Please sign in to comment.