-
Notifications
You must be signed in to change notification settings - Fork 15
/
boxes.py
350 lines (291 loc) · 14.4 KB
/
boxes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
from itertools import product
import numpy as np
def calculate_intersection_over_union(box_data, prior_boxes):
"""Calculate intersection over union of box_data with respect to
prior_boxes.
Arguments:
ground_truth_data: numpy array with shape (4) indicating x_min, y_min,
x_max and y_max coordinates of the bounding box.
prior_boxes: numpy array with shape (num_boxes, 4).
Returns:
intersections_over_unions: numpy array with shape (num_boxes) which
corresponds to the intersection over unions of box_data with respect
to all prior_boxes.
"""
x_min = box_data[0]
y_min = box_data[1]
x_max = box_data[2]
y_max = box_data[3]
prior_boxes_x_min = prior_boxes[:, 0]
prior_boxes_y_min = prior_boxes[:, 1]
prior_boxes_x_max = prior_boxes[:, 2]
prior_boxes_y_max = prior_boxes[:, 3]
# calculating the intersection
intersections_x_min = np.maximum(prior_boxes_x_min, x_min)
intersections_y_min = np.maximum(prior_boxes_y_min, y_min)
intersections_x_max = np.minimum(prior_boxes_x_max, x_max)
intersections_y_max = np.minimum(prior_boxes_y_max, y_max)
intersected_widths = intersections_x_max - intersections_x_min
intersected_heights = intersections_y_max - intersections_y_min
intersected_widths = np.maximum(intersected_widths, 0)
intersected_heights = np.maximum(intersected_heights, 0)
intersections = intersected_widths * intersected_heights
# calculating the union
prior_box_widths = prior_boxes_x_max - prior_boxes_x_min
prior_box_heights = prior_boxes_y_max - prior_boxes_y_min
prior_box_areas = prior_box_widths * prior_box_heights
box_width = x_max - x_min
box_height = y_max - y_min
ground_truth_area = box_width * box_height
unions = prior_box_areas + ground_truth_area - intersections
intersection_over_union = intersections / unions
return intersection_over_union
def regress_boxes(assigned_prior_boxes, ground_truth_box, box_scale_factors):
x_min_scale, y_min_scale, x_max_scale, y_max_scale = box_scale_factors
assigned_prior_boxes = to_center_form(assigned_prior_boxes)
center_x_prior = assigned_prior_boxes[:, 0]
center_y_prior = assigned_prior_boxes[:, 1]
w_prior = assigned_prior_boxes[:, 2]
h_prior = assigned_prior_boxes[:, 3]
x_min = ground_truth_box[0]
y_min = ground_truth_box[1]
x_max = ground_truth_box[2]
y_max = ground_truth_box[3]
encoded_center_x = ((x_min + x_max) / 2.) - center_x_prior
encoded_center_x = encoded_center_x / (x_min_scale * w_prior)
encoded_center_y = ((y_min + y_max) / 2.) - center_y_prior
encoded_center_y = encoded_center_y / (y_min_scale * h_prior)
encoded_w = (x_max - x_min) / w_prior
encoded_w = np.log(encoded_w) / x_max_scale
encoded_h = (y_max - y_min) / h_prior
encoded_h = np.log(encoded_h) / y_max_scale
regressed_boxes = np.concatenate([encoded_center_x[:, None],
encoded_center_y[:, None],
encoded_w[:, None],
encoded_h[:, None]], axis=1)
return regressed_boxes
def unregress_boxes(predicted_box_data, prior_boxes,
box_scale_factors=[.1, .1, .2, .2]):
x_min_scale, y_min_scale, x_max_scale, y_max_scale = box_scale_factors
encoded_center_x = predicted_box_data[:, 0]
encoded_center_y = predicted_box_data[:, 1]
encoded_w = predicted_box_data[:, 2]
encoded_h = predicted_box_data[:, 3]
prior_boxes = to_center_form(prior_boxes)
center_x_prior = prior_boxes[:, 0]
center_y_prior = prior_boxes[:, 1]
w_prior = prior_boxes[:, 2]
h_prior = prior_boxes[:, 3]
x_min = encoded_center_x * x_min_scale * w_prior
x_min = x_min + center_x_prior
y_min = encoded_center_y * y_min_scale * h_prior
y_min = y_min + center_y_prior
x_max = w_prior * np.exp(encoded_w * x_max_scale)
y_max = h_prior * np.exp(encoded_h * y_max_scale)
unregressed_boxes = np.concatenate([x_min[:, None], y_min[:, None],
x_max[:, None], y_max[:, None]],
axis=1)
unregressed_boxes[:, :2] -= unregressed_boxes[:, 2:] / 2
unregressed_boxes[:, 2:] += unregressed_boxes[:, :2]
unregressed_boxes = np.clip(unregressed_boxes, 0.0, 1.0)
if predicted_box_data.shape[1] > 4:
unregressed_boxes = np.concatenate([unregressed_boxes,
predicted_box_data[:, 4:]], axis=-1)
return unregressed_boxes
def to_point_form(boxes):
center_x = boxes[:, 0]
center_y = boxes[:, 1]
width = boxes[:, 2]
height = boxes[:, 3]
x_min = center_x - (width / 2.)
x_max = center_x + (width / 2.)
y_min = center_y - (height / 2.)
y_max = center_y + (height / 2.)
return np.concatenate([x_min[:, None], y_min[:, None],
x_max[:, None], y_max[:, None]], axis=1)
def to_center_form(boxes):
x_min = boxes[:, 0]
y_min = boxes[:, 1]
x_max = boxes[:, 2]
y_max = boxes[:, 3]
width = x_max - x_min
height = y_max - y_min
center_x = x_min + (width/2.)
center_y = y_max - (height/2.)
return np.concatenate([center_x[:, None], center_y[:, None],
width[:, None], height[:, None]], axis=1)
def assign_prior_boxes_to_ground_truth(ground_truth_box, prior_boxes,
box_scale_factors=[.1, .1, .2, .2],
regress=True, overlap_threshold=.5,
return_iou=True):
""" Assigns and regresses prior boxes to a single ground_truth_box
data sample.
TODO: Change this function so that it does not regress the boxes
automatically. It should only assign them but not regress them!
Arguments:
prior_boxes: numpy array with shape (num_prior_boxes, 4)
indicating x_min, y_min, x_max and y_max for every prior box.
ground_truth_box: numpy array with shape (4) indicating
x_min, y_min, x_max and y_max of the ground truth box.
box_scale_factors: numpy array with shape (num_boxes, 4)
Which represents a scaling of the localization gradient.
(https://github.com/weiliu89/caffe/issues/155)
Returns:
regressed_boxes: numpy array with shape (num_assigned_boxes)
which correspond to the regressed values of all
assigned_prior_boxes to the ground_truth_box
"""
ious = calculate_intersection_over_union(ground_truth_box, prior_boxes)
regressed_boxes = np.zeros((len(prior_boxes), 4 + return_iou))
assign_mask = ious > overlap_threshold
if not assign_mask.any():
assign_mask[ious.argmax()] = True
if return_iou:
regressed_boxes[:, -1][assign_mask] = ious[assign_mask]
assigned_prior_boxes = prior_boxes[assign_mask]
if regress:
assigned_regressed_priors = regress_boxes(assigned_prior_boxes,
ground_truth_box,
box_scale_factors)
regressed_boxes[assign_mask, 0:4] = assigned_regressed_priors
return regressed_boxes.ravel()
else:
regressed_boxes[assign_mask, 0:4] = assigned_prior_boxes[:, 0:4]
return regressed_boxes.ravel()
# TODO change this name for match
def assign_prior_boxes(prior_boxes, ground_truth_data, num_classes,
box_scale_factors=[.1, .1, .2, .2], regress=True,
overlap_threshold=.5, background_id=0):
""" Assign and regress prior boxes to all ground truth samples.
Arguments:
prior_boxes: numpy array with shape (num_prior_boxes, 4)
indicating x_min, y_min, x_max and y_max for every prior box.
ground_truth_data: numpy array with shape (num_samples, 4)
indicating x_min, y_min, x_max and y_max of the ground truth box.
box_scale_factors: numpy array with shape (num_boxes, 4)
Which represents a scaling of the localization gradient.
(https://github.com/weiliu89/caffe/issues/155)
Returns:
assignments: numpy array with shape
(num_samples, 4 + num_classes + 8)
which correspond to the regressed values of all
assigned_prior_boxes to the ground_truth_box
"""
assignments = np.zeros((len(prior_boxes), 4 + num_classes))
assignments[:, 4 + background_id] = 1.0
num_objects_in_image = len(ground_truth_data)
if num_objects_in_image == 0:
return assignments
encoded_boxes = np.apply_along_axis(assign_prior_boxes_to_ground_truth, 1,
ground_truth_data[:, :4], prior_boxes,
box_scale_factors, regress,
overlap_threshold)
encoded_boxes = encoded_boxes.reshape(-1, len(prior_boxes), 5)
best_iou = encoded_boxes[:, :, -1].max(axis=0)
best_iou_indices = encoded_boxes[:, :, -1].argmax(axis=0)
best_iou_mask = best_iou > 0
best_iou_indices = best_iou_indices[best_iou_mask]
num_assigned_boxes = len(best_iou_indices)
encoded_boxes = encoded_boxes[:, best_iou_mask, :]
box_sequence = np.arange(num_assigned_boxes)
assignments[best_iou_mask, :4] = encoded_boxes[best_iou_indices,
box_sequence, :4]
assignments[:, 4][best_iou_mask] = 0
assignments[:, 5:][best_iou_mask] = ground_truth_data[best_iou_indices,
5:]
return assignments
def create_prior_boxes(configuration=None):
if configuration is None:
configuration = get_configuration_file()
image_size = configuration['image_size']
feature_map_sizes = configuration['feature_map_sizes']
min_sizes = configuration['min_sizes']
max_sizes = configuration['max_sizes']
steps = configuration['steps']
model_aspect_ratios = configuration['aspect_ratios']
mean = []
for feature_map_arg, feature_map_size in enumerate(feature_map_sizes):
step = steps[feature_map_arg]
min_size = min_sizes[feature_map_arg]
max_size = max_sizes[feature_map_arg]
aspect_ratios = model_aspect_ratios[feature_map_arg]
for y, x in product(range(feature_map_size), repeat=2):
f_k = image_size / step
center_x = (x + 0.5) / f_k
center_y = (y + 0.5) / f_k
s_k = min_size / image_size
mean = mean + [center_x, center_y, s_k, s_k]
s_k_prime = np.sqrt(s_k * (max_size / image_size))
mean = mean + [center_x, center_y, s_k_prime, s_k_prime]
for aspect_ratio in aspect_ratios:
mean = mean + [center_x, center_y, s_k * np.sqrt(aspect_ratio),
s_k / np.sqrt(aspect_ratio)]
mean = mean + [center_x, center_y, s_k / np.sqrt(aspect_ratio),
s_k * np.sqrt(aspect_ratio)]
output = np.asarray(mean).reshape((-1, 4))
output = np.clip(output, 0, 1)
return output
def get_configuration_file():
configuration = {'feature_map_sizes': [38, 19, 10, 5, 3, 1],
'image_size': 300,
'steps': [8, 16, 32, 64, 100, 300],
'min_sizes': [30, 60, 111, 162, 213, 264],
'max_sizes': [60, 111, 162, 213, 264, 315],
'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
'variance': [0.1, 0.2]}
return configuration
def apply_non_max_suppression(boxes, scores, iou_thresh=.45, top_k=200):
""" non maximum suppression in numpy
Arguments:
boxes : array of boox coordinates of shape (num_samples, 4)
where each columns corresponds to x_min, y_min, x_max, y_max
scores : array of scores given for each box in 'boxes'
iou_thresh : float intersection over union threshold for removing boxes
top_k : int Number of maximum objects per class
Returns:
selected_indices : array of integers Selected indices of kept boxes
num_selected_boxes : int Number of selected boxes
"""
selected_indices = np.zeros(shape=len(scores))
if boxes is None or len(boxes) == 0:
return selected_indices
x_min = boxes[:, 0]
y_min = boxes[:, 1]
x_max = boxes[:, 2]
y_max = boxes[:, 3]
areas = (x_max - x_min) * (y_max - y_min)
remaining_sorted_box_indices = np.argsort(scores)
remaining_sorted_box_indices = remaining_sorted_box_indices[-top_k:]
num_selected_boxes = 0
while len(remaining_sorted_box_indices) > 0:
best_score_index = remaining_sorted_box_indices[-1]
selected_indices[num_selected_boxes] = best_score_index
num_selected_boxes = num_selected_boxes + 1
if len(remaining_sorted_box_indices) == 1:
break
remaining_sorted_box_indices = remaining_sorted_box_indices[:-1]
best_x_min = x_min[best_score_index]
best_y_min = y_min[best_score_index]
best_x_max = x_max[best_score_index]
best_y_max = y_max[best_score_index]
remaining_x_min = x_min[remaining_sorted_box_indices]
remaining_y_min = y_min[remaining_sorted_box_indices]
remaining_x_max = x_max[remaining_sorted_box_indices]
remaining_y_max = y_max[remaining_sorted_box_indices]
inner_x_min = np.maximum(remaining_x_min, best_x_min)
inner_y_min = np.maximum(remaining_y_min, best_y_min)
inner_x_max = np.minimum(remaining_x_max, best_x_max)
inner_y_max = np.minimum(remaining_y_max, best_y_max)
inner_box_widths = inner_x_max - inner_x_min
inner_box_heights = inner_y_max - inner_y_min
inner_box_widths = np.maximum(inner_box_widths, 0.0)
inner_box_heights = np.maximum(inner_box_heights, 0.0)
intersections = inner_box_widths * inner_box_heights
remaining_box_areas = areas[remaining_sorted_box_indices]
best_area = areas[best_score_index]
unions = remaining_box_areas + best_area - intersections
intersec_over_union = intersections / unions
intersec_over_union_mask = intersec_over_union <= iou_thresh
remaining_sorted_box_indices = remaining_sorted_box_indices[
intersec_over_union_mask]
return selected_indices.astype(int), num_selected_boxes