-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathyolo.py
191 lines (154 loc) · 7.19 KB
/
yolo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
"""YOLO_v2 Model Defined in Keras."""
"""from https://github.com/allanzelener/YAD2K """
import time
import numpy as np
import tensorflow as tf
from keras import backend as K
from keras.layers import Lambda
from keras.layers.merge import concatenate
from keras.models import Model
from darknet19 import (DarknetConv2D, DarknetConv2D_BN_Leaky,
darknet_body)
from utils import compose
def space_to_depth_x2(x):
"""Thin wrapper for Tensorflow space_to_depth with block_size=2."""
# Import currently required to make Lambda work.
# See: https://github.com/fchollet/keras/issues/5088#issuecomment-273851273
import tensorflow as tf
return tf.space_to_depth(x, block_size=2)
def space_to_depth_x2_output_shape(input_shape):
"""Determine space_to_depth output shape for block_size=2.
Note: For Lambda with TensorFlow backend, output shape may not be needed.
"""
return (input_shape[0], input_shape[1] // 2, input_shape[2] // 2, 4 *
input_shape[3]) if input_shape[1] else (input_shape[0], None, None,
4 * input_shape[3])
def yolo_body(inputs, num_anchors, num_classes):
"""Create YOLO_V2 model CNN body in Keras."""
darknet = Model(inputs, darknet_body()(inputs))
conv20 = compose(
DarknetConv2D_BN_Leaky(1024, (3, 3)),
DarknetConv2D_BN_Leaky(1024, (3, 3)))(darknet.output)
conv13 = darknet.layers[43].output
conv21 = DarknetConv2D_BN_Leaky(64, (1, 1))(conv13)
# TODO: Allow Keras Lambda to use func arguments for output_shape?
conv21_reshaped = Lambda(
space_to_depth_x2,
output_shape=space_to_depth_x2_output_shape,
name='space_to_depth')(conv21)
x = concatenate([conv21_reshaped, conv20])
x = DarknetConv2D_BN_Leaky(1024, (3, 3))(x)
x = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x)
return Model(inputs, x)
def yolo_head(feats, anchors, num_classes):
"""Convert final layer features to bounding box parameters.
Parameters
----------
feats : tensor
Final convolutional layer features.
anchors : array-like
Anchor box widths and heights.
num_classes : int
Number of target classes.
Returns
-------
box_xy : tensor
x, y box predictions adjusted by spatial location in conv layer.
box_wh : tensor
w, h box predictions adjusted by anchors and conv spatial resolution.
box_conf : tensor
Probability estimate for whether each box contains any object.
box_class_pred : tensor
Probability distribution estimate for each box over class labels.
"""
num_anchors = len(anchors)
# Reshape to batch, height, width, num_anchors, box_params.
anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])
# Static implementation for fixed models.
# TODO: Remove or add option for static implementation.
# _, conv_height, conv_width, _ = K.int_shape(feats)
# conv_dims = K.variable([conv_width, conv_height])
# Dynamic implementation of conv dims for fully convolutional model.
conv_dims = K.shape(feats)[1:3] # assuming channels last
# In YOLO the height index is the inner most iteration.
conv_height_index = K.arange(0, stop=conv_dims[0])
conv_width_index = K.arange(0, stop=conv_dims[1])
conv_height_index = K.tile(conv_height_index, [conv_dims[1]])
# TODO: Repeat_elements and tf.split doesn't support dynamic splits.
# conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
conv_width_index = K.tile(
K.expand_dims(conv_width_index, 0), [conv_dims[0], 1])
conv_width_index = K.flatten(K.transpose(conv_width_index))
conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
conv_index = K.cast(conv_index, K.dtype(feats))
feats = K.reshape(
feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))
# Static generation of conv_index:
# conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)])
# conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering.
# conv_index = K.variable(
# conv_index.reshape(1, conv_height, conv_width, 1, 2))
# feats = Reshape(
# (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats)
box_xy = K.sigmoid(feats[..., :2])
box_wh = K.exp(feats[..., 2:4])
box_confidence = K.sigmoid(feats[..., 4:5])
box_class_probs = K.softmax(feats[..., 5:])
# Adjust preditions to each spatial grid point and anchor size.
# Note: YOLO iterates over height index before width index.
box_xy = (box_xy + conv_index) / conv_dims
box_wh = box_wh * anchors_tensor / conv_dims
return box_xy, box_wh, box_confidence, box_class_probs
def yolo_boxes_to_corners(box_xy, box_wh):
"""Convert YOLO box predictions to bounding box corners."""
box_mins = box_xy - (box_wh / 2.)
box_maxes = box_xy + (box_wh / 2.)
return K.concatenate([
box_mins[..., 1:2], # y_min
box_mins[..., 0:1], # x_min
box_maxes[..., 1:2], # y_max
box_maxes[..., 0:1] # x_max
])
def yolo(inputs, anchors, num_classes):
"""Generate a complete YOLO_v2 localization model."""
num_anchors = len(anchors)
body = yolo_body(inputs, num_anchors, num_classes)
outputs = yolo_head(body.output, anchors, num_classes)
return outputs
def yolo_filter_boxes(boxes, box_confidence, box_class_probs, threshold=.6):
"""Filter YOLO boxes based on object and class confidence."""
box_scores = box_confidence * box_class_probs
box_classes = K.argmax(box_scores, axis=-1)
box_class_scores = K.max(box_scores, axis=-1)
prediction_mask = box_class_scores >= threshold
boxes = tf.boolean_mask(boxes, prediction_mask)
scores = tf.boolean_mask(box_class_scores, prediction_mask)
classes = tf.boolean_mask(box_classes, prediction_mask)
return boxes, scores, classes
def yolo_eval(yolo_outputs,
image_shape,
max_boxes=10,
score_threshold=.6,
iou_threshold=.5):
"""Evaluate YOLO model on given input batch and return filtered boxes."""
box_xy, box_wh, box_confidence, box_class_probs = yolo_outputs
boxes = yolo_boxes_to_corners(box_xy, box_wh)
boxes, scores, classes = yolo_filter_boxes(
boxes, box_confidence, box_class_probs, threshold=score_threshold)
# Scale boxes back to original image shape.
height = image_shape[0]
width = image_shape[1]
image_dims = K.stack([height, width, height, width])
image_dims = K.reshape(image_dims, [1, 4])
boxes = boxes * image_dims
# TODO: Something must be done about this ugly hack!
max_boxes_tensor = K.variable(max_boxes, dtype='int32')
K.get_session().run(tf.variables_initializer([max_boxes_tensor]))
nms_index = tf.image.non_max_suppression(
boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold)
boxes = K.gather(boxes, nms_index)
scores = K.gather(scores, nms_index)
classes = K.gather(classes, nms_index)
return boxes, scores, classes