forked from IntelRealSense/librealsense
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rs-dnn.cpp
148 lines (121 loc) · 5.73 KB
/
rs-dnn.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
// This example is derived from the ssd_mobilenet_object_detection opencv demo
// and adapted to be used with Intel RealSense Cameras
// Please see https://github.com/opencv/opencv/blob/master/LICENSE
#include <opencv2/dnn.hpp>
#include <librealsense2/rs.hpp>
#include "../cv-helpers.hpp"
const size_t inWidth = 300;
const size_t inHeight = 300;
const float WHRatio = inWidth / (float)inHeight;
const float inScaleFactor = 0.007843f;
const float meanVal = 127.5;
const char* classNames[] = {"background",
"aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair",
"cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor"};
int main(int argc, char** argv) try
{
using namespace cv;
using namespace cv::dnn;
using namespace rs2;
Net net = readNetFromCaffe("MobileNetSSD_deploy.prototxt",
"MobileNetSSD_deploy.caffemodel");
// Start streaming from Intel RealSense Camera
pipeline pipe;
auto config = pipe.start();
auto profile = config.get_stream(RS2_STREAM_COLOR)
.as<video_stream_profile>();
rs2::align align_to(RS2_STREAM_COLOR);
Size cropSize;
if (profile.width() / (float)profile.height() > WHRatio)
{
cropSize = Size(static_cast<int>(profile.height() * WHRatio),
profile.height());
}
else
{
cropSize = Size(profile.width(),
static_cast<int>(profile.width() / WHRatio));
}
Rect crop(Point((profile.width() - cropSize.width) / 2,
(profile.height() - cropSize.height) / 2),
cropSize);
const auto window_name = "Display Image";
namedWindow(window_name, WINDOW_AUTOSIZE);
while (getWindowProperty(window_name, WND_PROP_AUTOSIZE) >= 0)
{
// Wait for the next set of frames
auto data = pipe.wait_for_frames();
// Make sure the frames are spatially aligned
data = align_to.process(data);
auto color_frame = data.get_color_frame();
auto depth_frame = data.get_depth_frame();
// If we only received new depth frame,
// but the color did not update, continue
static int last_frame_number = 0;
if (color_frame.get_frame_number() == last_frame_number) continue;
last_frame_number = static_cast<int>(color_frame.get_frame_number());
// Convert RealSense frame to OpenCV matrix:
auto color_mat = frame_to_mat(color_frame);
auto depth_mat = depth_frame_to_meters(depth_frame);
Mat inputBlob = blobFromImage(color_mat, inScaleFactor,
Size(inWidth, inHeight), meanVal, false); //Convert Mat to batch of images
net.setInput(inputBlob, "data"); //set the network input
Mat detection = net.forward("detection_out"); //compute output
Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
// Crop both color and depth frames
color_mat = color_mat(crop);
depth_mat = depth_mat(crop);
float confidenceThreshold = 0.8f;
for(int i = 0; i < detectionMat.rows; i++)
{
float confidence = detectionMat.at<float>(i, 2);
if(confidence > confidenceThreshold)
{
size_t objectClass = (size_t)(detectionMat.at<float>(i, 1));
int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * color_mat.cols);
int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * color_mat.rows);
int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * color_mat.cols);
int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * color_mat.rows);
Rect object((int)xLeftBottom, (int)yLeftBottom,
(int)(xRightTop - xLeftBottom),
(int)(yRightTop - yLeftBottom));
object = object & Rect(0, 0, depth_mat.cols, depth_mat.rows);
// Calculate mean depth inside the detection region
// This is a very naive way to estimate objects depth
// but it is intended to demonstrate how one might
// use depth data in general
Scalar m = mean(depth_mat(object));
std::ostringstream ss;
ss << classNames[objectClass] << " ";
ss << std::setprecision(2) << m[0] << " meters away";
String conf(ss.str());
rectangle(color_mat, object, Scalar(0, 255, 0));
int baseLine = 0;
Size labelSize = getTextSize(ss.str(), FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
auto center = (object.br() + object.tl())*0.5;
center.x = center.x - labelSize.width / 2;
rectangle(color_mat, Rect(Point(center.x, center.y - labelSize.height),
Size(labelSize.width, labelSize.height + baseLine)),
Scalar(255, 255, 255), FILLED);
putText(color_mat, ss.str(), center,
FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0));
}
}
imshow(window_name, color_mat);
if (waitKey(1) >= 0) break;
}
return EXIT_SUCCESS;
}
catch (const rs2::error & e)
{
std::cerr << "RealSense error calling " << e.get_failed_function() << "(" << e.get_failed_args() << "):\n " << e.what() << std::endl;
return EXIT_FAILURE;
}
catch (const std::exception& e)
{
std::cerr << e.what() << std::endl;
return EXIT_FAILURE;
}