-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdetect_test.py
263 lines (208 loc) · 9.03 KB
/
detect_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
# USAGE
# python detect_realtime_tinyyolo_ncs.py --conf config/config.json \
# import the necessary packages
from openvino.inference_engine import IENetwork
from openvino.inference_engine import IEPlugin
from intel.yoloparams import TinyYOLOV3Params
from intel.tinyyolo import TinyYOLOv3
from imutils.video import VideoStream
from pyimagesearch.utils import Conf
import numpy as np
import argparse
import imutils
import time
import cv2
import os
import stt
import tts
import sys
import ttsMp3
import naver_tts
import timer as t
import socket
HOST = '192.168.43.240' # 'www.9shipcontrol.com'
PORT = 3000
# Websocket transmitting function
def sendFileToServer(filename):
s = socket.socket()
s.bind((HOST, PORT))
f = open(filename, "rb")
s.listen(5)
client, addr = s.accept()
content = f.read(8096)
while (content):
client.send(content)
content = f.read(8096)
f.close()
client.shutdown(socket.SHUT_WR)
print("Image sent !")
s.close()
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--conf", required=True,
help="Path to the input configuration file")
ap.add_argument("-i", "--input", help="path to the input video file")
args = vars(ap.parse_args())
# load the configuration file
conf = Conf(args["conf"])
# load the COCO class labels our YOLO model was trained on and
# initialize a list of colors to represent each possible class
# label
LABELS = open(conf["labels_path"]).read().strip().split("\n")
np.random.seed(42)
COLORS = np.random.uniform(0, 255, size=(len(LABELS), 3))
ANSWER=["yes","no"]
# initialize the plugin in for specified device
plugin = IEPlugin(device="MYRIAD")
# read the IR generated by the Model Optimizer (.xml and .bin files)
print("[INFO] loading models...")
net = IENetwork(model=conf["xml_path"], weights=conf["bin_path"])
# prepare inputs
print("[INFO] preparing inputs...")
inputBlob = next(iter(net.inputs))
# set the default batch size as 1 and get the number of input blobs,
# number of channels, the height, and width of the input blob
net.batch_size = 1
(n, c, h, w) = net.inputs[inputBlob].shape
# if a video path was not supplied, grab a reference to the webcam
#load videostream to use stt.py
if args["input"] is None:
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(2.0)
# loading model to the plugin and start the frames per second
# throughput estimator
print("[INFO] loading model to the plugin...")
execNet = plugin.load(network=net, num_requests=1)
TTSanswer="yes"
ttsMp3.pysound("./infoSound/shutdownafter3min.wav")
while True:
########################################################
# timeout (15s)
########################################################
ttsMp3.pysound("./infoSound/startInfo.wav")
print("[INFO] STT start..")
sttWord=stt.main()
sttWord = sttWord.lower()
print("[INFO] STT end..")
detected=[] #detected classes
capimg=[] #capture image
while sttWord not in LABELS:
##################################################
ttsMp3.pysound("./infoSound/notraineddata.wav")
# ttsMp3.tts() : The item you are looking for was not understood. please tell me again
##################################################
print("retry:)")
sttWord=stt.main()
sttWord=sttWord.lower()
t.exit_timer("still")
ttsMp3.pysound("./infoSound/initiateDetection.wav")
# loop over the frames from the video stream
while True:
# grab the next frame and handle if we are reading from either
# VideoCapture or VideoStream
if t.time_end == 60:
vs.stop() if args["input"] is None else vs.release()
cv2.destroyAllWindows()
sys.exit(0)
orig = vs.read()
orig = orig[1] if args["input"] is not None else orig
# if we are viewing a video and we did not grab a frame then we
# have reached the end of the video
if args["input"] is not None and orig is None:
break
# resize original frame to have a maximum width of 500 pixel and
# input_frame to network size
orig = imutils.resize(orig, width=500)
frame = cv2.resize(orig, (w, h))
# change data layout from HxWxC to CxHxW
frame = frame.transpose((2, 0, 1))
frame = frame.reshape((n, c, h, w))
# start inference and initialize list to collect object detection
# results
output = execNet.infer({inputBlob: frame})
objects = []
# loop over the output items
for (layerName, outBlob) in output.items():
# create a new object which contains the required tinyYOLOv3
# parameters
layerParams = TinyYOLOV3Params(net.layers[layerName].params,
outBlob.shape[2])
# parse the output region
objects += TinyYOLOv3.parse_yolo_region(outBlob,
frame.shape[2:], orig.shape[:-1], layerParams,
conf["prob_threshold"])
# loop over each of the objects
for i in range(len(objects)):
# check if the confidence of the detected object is zero, if
# it is, then skip this iteration, indicating that the object
# should be ignored
if objects[i]["confidence"] == 0:
continue
# loop over remaining objects
for j in range(i + 1, len(objects)):
# check if the IoU of both the objects exceeds a
# threshold, if it does, then set the confidence of that
# object to zero
if TinyYOLOv3.intersection_over_union(objects[i],
objects[j]) > conf["iou_threshold"]:
objects[j]["confidence"] = 0
# filter objects by using the probability threshold -- if a an
# object is below the threshold, ignore it
objects = [obj for obj in objects if obj['confidence'] >= \
conf["prob_threshold"]]
# store the height and width of the original frame
(endY, endX) = orig.shape[:-1]
# print(objects)
# loop through all the remaining objects
for obj in objects:
# validate the bounding box of the detected object, ensuring
# we don't have any invalid bounding boxes
if obj["xmax"] > endX or obj["ymax"] > endY or obj["xmin"] \
< 0 or obj["ymin"] < 0:
continue
# build a label consisting of the predicted class and
# associated probability
#print(LABELS[obj["class_id"]])
label = "{}: {:.2f}%".format(LABELS[obj["class_id"]],
obj["confidence"] * 100)
# calculate the y-coordinate used to write the label on the
# frame depending on the bounding box coordinate
y = obj["ymin"] - 15 if obj["ymin"] - 15 > 15 else \
obj["ymin"] + 15
# draw a bounding box rectangle and label on the frame
cv2.rectangle(orig, (obj["xmin"], obj["ymin"]), (obj["xmax"],
obj["ymax"]), COLORS[obj["class_id"]], 2)
cv2.putText(orig, label, (obj["xmin"], y),
cv2.FONT_HERSHEY_SIMPLEX, 1, COLORS[obj["class_id"]], 3)
if LABELS[obj["class_id"]] in detected:
pass
else:
detected.append(LABELS[obj["class_id"]])
cv2.imshow("TinyYOLOv3", orig)
key = cv2.waitKey(1) & 0xFF
if sttWord in detected:
cv2.imwrite("./result.jpg", orig)
f = open('text.txt', mode='wt', encoding='utf-8')
f.write(sttWord)
# Transmit the image to AWS
sendFileToServer("result.jpg")
f.close()
naver_tts.tts(sttWord)
break
ttsMp3.pysound("./infoSound/morefind.wav")
print("[INFO] STT start..")
TTSanswer=stt.main()
print("[INFO] STT end..")
while TTSanswer not in ANSWER:
ttsMp3.pysound("./infoSound/yesorno.wav")
TTSanswer=stt.main()
TTSanswer=TTSanswer.lower()
if TTSanswer == "no":
ttsMp3.pysound("./infoSound/closetheapp.wav")
print("[INFO] Application Exit")
t.exit_timer("close")
break
# stop the video stream and close any open windows1
vs.stop() if args["input"] is None else vs.release()
cv2.destroyAllWindows()