-
Notifications
You must be signed in to change notification settings - Fork 1
/
yolov5_letterbox_simple.py
148 lines (116 loc) · 4.5 KB
/
yolov5_letterbox_simple.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import cv2
import numpy as np
# Constants.
INPUT_WIDTH = 640
INPUT_HEIGHT = 640
SCORE_THRESHOLD = 0.5
NMS_THRESHOLD = 0.45
CONFIDENCE_THRESHOLD = 0.45
# Text parameters.
FONT_FACE = cv2.FONT_HERSHEY_SIMPLEX
FONT_SCALE = 0.7
THICKNESS = 1
# Colors
BLACK = (0,0,0)
BLUE = (255,178,50)
YELLOW = (0,255,255)
RED = (0,0,255)
def draw_label(input_image, label, left, top):
"""Draw text onto image at location."""
# Get text size.
text_size = cv2.getTextSize(label, FONT_FACE, FONT_SCALE, THICKNESS)
dim, baseline = text_size[0], text_size[1]
# Use text size to create a BLACK rectangle.
cv2.rectangle(input_image, (left, top), (left + dim[0], top + dim[1] + baseline), BLACK, cv2.FILLED);
# Display text inside the rectangle.
cv2.putText(input_image, label, (left, top + dim[1]), FONT_FACE, FONT_SCALE, YELLOW, THICKNESS, cv2.LINE_AA)
def format_yolov5(source):
row, col = source.shape[:2]
_max = max(col, row)
# Create a blank square image with the same type as the source image
result = np.zeros((_max, _max, 3), dtype=source.dtype)
# Copy the original image to the top-left corner of the result image
result[0:row, 0:col] = source
return result
def pre_process(input_image, net):
input_image=format_yolov5(input_image)
# Create a 4D blob from a frame.
blob = cv2.dnn.blobFromImage(input_image, 1/255, (INPUT_WIDTH, INPUT_HEIGHT), [0,0,0], 1, crop=False)
# Sets the input to the network.
net.setInput(blob)
# Runs the forward pass to get output of the output layers.
output_layers = net.getUnconnectedOutLayersNames()
outputs = net.forward(output_layers)
return outputs
def post_process(input_image, outputs):
# Lists to hold respective values while unwrapping.
class_ids = []
confidences = []
boxes = []
# Rows.
rows = outputs[0].shape[1]
image_height, image_width = input_image.shape[:2]
# Select max value, because we already put original picture in black sqaure
_max = max(image_width, image_height)
# Resizing factor.
x_factor = _max / INPUT_WIDTH
y_factor = _max / INPUT_HEIGHT
# Iterate through 25200 detections.
for r in range(rows):
row = outputs[0][0][r]
confidence = row[4]
# Discard bad detections and continue.
if confidence >= CONFIDENCE_THRESHOLD:
classes_scores = row[5:]
# Get the index of max class score.
class_id = np.argmax(classes_scores)
# Continue if the class score is above threshold.
if (classes_scores[class_id] > SCORE_THRESHOLD):
confidences.append(confidence)
class_ids.append(class_id)
cx, cy, w, h = row[0], row[1], row[2], row[3]
left = int((cx - w/2) * x_factor)
top = int((cy - h/2) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
box = np.array([left, top, width, height])
boxes.append(box)
# Perform non maximum suppression to eliminate redundant overlapping boxes with
# lower confidences.
indices = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
for i in indices:
box = boxes[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
cv2.rectangle(input_image, (left, top), (left + width, top + height), BLUE, 3*THICKNESS)
label = "{}:{:.2f}".format(classes[class_ids[i]], confidences[i])
draw_label(input_image, label, left, top)
return input_image
if __name__ == '__main__':
# Load class names.
classesFile = "coco/classes.txt"
classes = None
with open(classesFile, 'rt') as f:
classes = f.read().rstrip('\n').split('\n')
# Load image.
frame = cv2.imread('sample.jpg')
# Give the weight files to the model and load the network using them.
modelWeights = "weights/yolov5s.onnx"
net = cv2.dnn.readNet(modelWeights)
# Process image.
detections = pre_process(frame, net)
# print(np.array(detections).shape)
img = post_process(frame.copy(), detections)
# Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency())
print(label)
cv2.putText(img, label, (20, 40), FONT_FACE, FONT_SCALE, RED, THICKNESS, cv2.LINE_AA)
# cv2.imshow('Output', img)
# cv2.waitKey(0)
# Save the output image instead of displaying it, because now using headless opencv!
output_file = 'output_py_letterbox.jpg'
cv2.imwrite(output_file, img)
print(f"Output saved as {output_file}")