-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathdemo.cpp
290 lines (249 loc) · 12.8 KB
/
demo.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
#include <bits/stdc++.h>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
using namespace cv::dnn;
using namespace std;
/* --------------------------------------------------------------------------------------- */
/* Description: checks if a cropped ear image is left oriented */
/* Parameters: */
/* Mat img: 96x96 image with a cropped ear */
/* - left or right oriented */
/* - with or without pose variation */
/* Return: true if ear is left oriented, false otherwise */
/* --------------------------------------------------------------------------------------- */
bool isLeft(Mat img) {
static bool first = true;
static Net net;
if(first) {
first = false;
net = readNetFromTensorflow("model-side.pb");
if(net.empty()) {
cerr << "ERROR: Could not load the CNN for side classification" << endl;
exit(1);
}
}
Mat inputBlob = blobFromImage(img);
inputBlob /= 255.0;
net.setInput(inputBlob, "Placeholder");
Mat result = net.forward("side_/out/MatMul");
return result.at<float>(0,0) > result.at<float>(0,1);
}
/* --------------------------------------------------------------------------------------- */
/* Description: detects landmarks in a left oriented cropped ear image */
/* Parameters: */
/* Mat img: 96x96 image with a cropped ear */
/* - left oriented */
/* - with or without pose variation */
/* vector<Point2d> &ldmk: vector that receives the 2d coordinates of 55 landmarks */
/* stage s: FIRST locates landmarks in ears with large pose variations */
/* SECOND locates landmarks in ears with coarse pose normalization */
/* Return: none */
/* --------------------------------------------------------------------------------------- */
typedef enum __stage {FIRST, SECOND} stage;
void detectLandmarks(Mat img, vector<Point2d> &ldmk, stage s) {
static bool first = true;
static Net net1, net2;
if(first) {
first = false;
net1 = readNetFromTensorflow("model-stage1.pb");
net2 = readNetFromTensorflow("model-stage2.pb");
if(net1.empty() || net2.empty()) {
cerr << "ERROR: Could not load the CNNs for landmark detection" << endl;
exit(1);
}
}
Mat result, inputBlob = blobFromImage(img);
inputBlob /= 255.0;
if(s == FIRST) {
net1.setInput(inputBlob);
result = net1.forward("ear_ang45_3_sca20_r_tra20_r_e/out/MatMul");
}
else {
net2.setInput(inputBlob);
result = net2.forward("ear_ang45_3_sca20_r_tra20_r_e/out/MatMul");
}
result *= 48;
result += 48;
ldmk.clear();
for(int i=0; i < 55; i++)
ldmk.push_back(Point2d(result.at<float>(0,i*2), result.at<float>(0,i*2+1)));
}
/* --------------------------------------------------------------------------------------- */
/* Description: extracts CNN descriptor from a left oriented, pose normalized ear image */
/* Parameters: */
/* Mat img: 128x128 image with a normalized ear */
/* - left oriented */
/* - without pose variation */
/* - ear height is equal to ear width */
/* Return: Mat with a 512-dimensional descriptor */
/* --------------------------------------------------------------------------------------- */
Mat extractDescriptor(Mat img) {
static bool first = true;
static Net net;
if(first) {
first = false;
net = readNetFromTensorflow("model-descriptor.pb");
if(net.empty()) {
cerr << "ERROR: Could not load the CNN for side classification" << endl;
exit(1);
}
}
Mat inputBlob = blobFromImage(img);
inputBlob /= 255.0;
net.setInput(inputBlob);
Mat result = net.forward("MatMul");
return result.clone();
}
/* --------------------------------------------------------------------------------------- */
/* Description: interpolate ear image using ear center, orientation and scale */
/* Parameters: */
/* Mat image: image with any size of a cropped ear */
/* Mat &output: output interpolated image */
/* int size: output dimensions are size x size */
/* double scaley: vertical radius of the ear in the input image (in pixels) */
/* double scalex: horizontal radius of the ear in the input image (in pixels) */
/* double ang: ear rotation in radians */
/* double cx: center of the ear in the horizontal axis */
/* double cy: center of the ear in the vertical axis */
/* Return: none */
/* --------------------------------------------------------------------------------------- */
void normalizeImage(Mat image, Mat &output, int size, double scaley, double scalex, double ang, double cx, double cy) {
output.create(size, size, CV_8UC1);
double ratioy = (scaley/((size-1)/2.0)), ratiox = (scalex/((size-1)/2.0));
for(int i=0; i < size; i++)
for(int j=0; j < size; j++) {
double xt = ratiox*(j-(size-1)/2.0), yt = ratioy*(i-(size-1)/2.0);
double x = xt*cos(ang)-yt*sin(ang)+cx, y = xt*sin(ang)+yt*cos(ang)+cy;
int u = x, v = y;
double ul = x-u, vl = y-v;
int u1 = u+1, v1 = v+1;
u = max(0, min(image.cols-1, u));
u1 = max(0, min(image.cols-1, u1));
v = max(0, min(image.rows-1, v));
v1 = max(0, min(image.rows-1, v1));
double tmp = image.at<uchar>(v,u)*(1.0-ul)*(1.0-vl) + image.at<uchar>(v,u1)*ul*(1.0-vl) + image.at<uchar>(v1,u)*(1.0-ul)*vl + image.at<uchar>(v1,u1)*ul*vl;
output.at<uchar>(i,j) = max(0.0,min(255.0,tmp));
}
}
/* --------------------------------------------------------------------------------------- */
/* Description: adjust pose parameters using landmarks */
/* Parameters: */
/* vector<Point2d> ldmk: 2d coordinates of 55 landmarks */
/* int size: size of the interpolated image from which the landmarks were extracted */
/* int size: output dimensions are size x size */
/* double &scaley: vertical radius of the ear in the input image (in pixels) */
/* double &scalex: horizontal radius of the ear in the input image (in pixels) */
/* double &ang: ear rotation in radians */
/* double &cx: center of the ear in the horizontal axis */
/* double &cy: center of the ear in the vertical axis */
/* bool oriented: true if new center must use oriented bounding box, false otherwise */
/* bool scaled: true if interpolated image was generated using different values for */
/* scaley and scalex, false otherwise */
/* Observation: scaley, scalex, ang, cx and cy must contain the values used to generate */
/* the interpolated image, and will be overwritten with the new values */
/* Return: none */
/* --------------------------------------------------------------------------------------- */
void adjustParameters(vector<Point2d> ldmk, int size, double &scaley, double &scalex, double &ang, double &cx, double &cy, bool oriented, bool scaled) {
double ratioy = (scaley/((size-1)/2.0)), ratiox = ((scaled ? scalex : scaley)/((size-1)/2.0));
/* align landmark coordinate space to the original image */
/* compute principal components and bounding box for aligned landmarks */
Mat data_pts = Mat(55, 2, CV_64FC1);
for(int i=0; i < 55; i++) {
double xt = ratiox*(ldmk[i].x-(size-1)/2.0), yt = ratioy*(ldmk[i].y-(size-1)/2.0);
data_pts.at<double>(i, 0) = xt*cos(ang)-yt*sin(ang)+cx;
data_pts.at<double>(i, 1) = xt*sin(ang)+yt*cos(ang)+cy;
}
PCA pca_analysis(data_pts, Mat(), CV_PCA_DATA_AS_ROW);
/* set orientation of the ear as the direction of the first principal component */
double angle = atan2(pca_analysis.eigenvectors.at<double>(0, 1), pca_analysis.eigenvectors.at<double>(0, 0));
while(angle < 0.0)
angle += 2.0*M_PI;
if(angle > M_PI)
angle -= M_PI;
ang = angle-M_PI/2.0;
/* set scale as two times the deviation of the first principal component */
scaley = 2.0*sqrt(pca_analysis.eigenvalues.at<double>(0, 0));
scalex = 2.0*sqrt(pca_analysis.eigenvalues.at<double>(0, 1));
if(!oriented) {
/* set center as the center of the bounding box */
Point2d tl = {DBL_MAX, DBL_MAX}, br = {-DBL_MAX, -DBL_MAX};
for(int i=0; i < 55; i++) {
tl.x = min(tl.x, data_pts.at<double>(i, 0));
tl.y = min(tl.y, data_pts.at<double>(i, 1));
br.x = max(br.x, data_pts.at<double>(i, 0));
br.y = max(br.y, data_pts.at<double>(i, 1));
}
cx = (tl.x+br.x)/2.0;
cy = (tl.y+br.y)/2.0;
}
else {
/* set center as the center of the oriented bounding box */
Mat norm_pts = pca_analysis.project(data_pts);
double top=-DBL_MAX, bottom=DBL_MAX, left=DBL_MAX, right=-DBL_MAX;
for(int i=0; i < 55; i++) {
top = max(top, norm_pts.at<double>(i, 0));
bottom = min(bottom, norm_pts.at<double>(i, 0));
right = max(right, norm_pts.at<double>(i, 1));
left = min(left, norm_pts.at<double>(i, 1));
}
cx = (bottom+top)/2.0*pca_analysis.eigenvectors.at<double>(0, 0) + (left+right)/2.0*pca_analysis.eigenvectors.at<double>(1, 0) + pca_analysis.mean.at<double>(0, 0);
cy = (bottom+top)/2.0*pca_analysis.eigenvectors.at<double>(0, 1) + (left+right)/2.0*pca_analysis.eigenvectors.at<double>(1, 1) + pca_analysis.mean.at<double>(0, 1);
}
}
/* magic function */
int main(int argc, char **argv) {
/* check if an image name was provided as argument */
if(argc != 2) {
cerr << "Usage:" << argv[0] << " filename.{png|jpg|...}" << endl;
return 1;
}
/* load cropped ear image */
Mat image = imread(argv[1], IMREAD_GRAYSCALE), interpolated;
/* image size for side classification and landmark detection */
const int PREPROC_SIZE = 96;
/* image size for cnn description */
const int DESCRIPT_SIZE = 128;
/* set largest image axis as initial scale */
double scaley = (max(image.rows, image.cols)-1.0)/2.0, scalex;
/* initial orientation unknown */
double ang = 0.0;
/* use center of the image as initial ear center */
double cx = (image.cols-1.0)/2.0, cy = (image.rows-1.0)/2.0;
/* vector for 2d coordinates of ear landmarks */
vector<Point2d> landmarks;
/* normalize image using initial guesses for ear location, size and orientation */
/* use the same scale for x and y axes */
normalizeImage(image, interpolated, PREPROC_SIZE, scaley, scaley, ang, cx, cy);
/* check if the ear is left-oriented and flip it if it is not */
if(!isLeft(interpolated)) {
Mat tmp;
flip(interpolated, tmp, 1);
tmp.copyTo(interpolated);
flip(image, tmp, 1);
tmp.copyTo(image);
}
/* detect landmarks using stage 1 (robust to intense variations) */
detectLandmarks(interpolated, landmarks, FIRST);
/* normalize image using adjusted parameters */
/* use the same scale for x and y axes */
adjustParameters(landmarks, PREPROC_SIZE, scaley, scalex, ang, cx, cy, false, false);
normalizeImage(image, interpolated, PREPROC_SIZE, scaley, scaley, ang, cx, cy);
/* detect landmarks using stage 2 (fine tuned to small variations) */
detectLandmarks(interpolated, landmarks, SECOND);
/* normalize image using adjusted parameters */
/* use different scales for x and y axes */
adjustParameters(landmarks, PREPROC_SIZE, scaley, scalex, ang, cx, cy, true, false);
normalizeImage(image, interpolated, DESCRIPT_SIZE, scaley, scalex, ang, cx, cy);
/* extract discriminant descriptor */
Mat descriptor = extractDescriptor(interpolated);
/* save normalized image and CNN descriptor */
imwrite("output.png", interpolated);
ofstream fp("output.txt", ofstream::out);
for(int i=0; i < 512; i++)
fp << descriptor.at<float>(0,i) << " ";
fp << endl;
fp.close();
return 0;
}