-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdetect_video.py
112 lines (96 loc) · 3.38 KB
/
detect_video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# USAGE
# python detect_video.py --model mobilenet_ssd_v2/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite --labels mobilenet_ssd_v2/coco_labels.txt
from __future__ import print_function
from edgetpu.detection.engine import DetectionEngine
from imutils.video import VideoStream
import imutils
from PIL import Image
import argparse
import time
import cv2
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-m", "--model", required=True,
help="path to TensorFlow Lite object detection model")
ap.add_argument("-l", "--labels", required=True,
help="path to labels file")
ap.add_argument("-c", "--confidence", type=float, default=0.3,
help="minimum probability to filter weak detections")
args = vars(ap.parse_args())
# initialize the labels dictionary
print("[INFO] parsing class labels...")
labels = {}
fps = ""
detectfps = ""
framecount = 0
detectframecount = 0
time1 = 0
time2 = 0
# loop over the class labels file
for row in open(args["labels"]):
# unpack the row and update the labels dictionary
(classID, label) = row.strip().split(maxsplit=1)
labels[int(classID)] = label.strip()
# load the Google Coral object detection model
print("[INFO] loading Coral model...")
model = DetectionEngine(args["model"])
# initialize the video stream and allow the camera sensor to warmup
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
#vs = VideoStream(usePiCamera=False).start()
time.sleep(2.0)
# loop over the frames from the video stream
while True:
t1 = time.perf_counter()
# grab the frame from the threaded video stream and resize it
# to have a maximum width of 500 pixels
frame = vs.read()
#frame = imutils.resize(frame, width=300, height=300)
orig = frame.copy()
# prepare the frame for object detection by converting (1) it
# from BGR to RGB channel ordering and then (2) from a NumPy
# array to PIL image format
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = Image.fromarray(frame)
framecount += 1
if framecount >= 15:
fps = "(Playback) {:.1f} FPS".format(time1/15)
#detectfps = "(Detection) {:.1f} FPS".format(detectframecount/time2)
detectfps = "(Detection) {:.1f} ms".format((end - start) * 1000)
framecount = 0
detectframecount = 0
time1 = 0
time2 = 0
print("Playback FPS: " + fps + "Detection FPS: " + detectfps)
t2 = time.perf_counter()
elapsedTime = t2-t1
time1 += 1/elapsedTime
time2 += elapsedTime
# make predictions on the input frame
start = time.time()
#detectframecount += 1
results = model.DetectWithImage(frame, threshold=args["confidence"],
keep_aspect_ratio=True, relative_coord=False)
end = time.time()
# loop over the results
for r in results:
# extract the bounding box and box and predicted class label
box = r.bounding_box.flatten().astype("int")
(startX, startY, endX, endY) = box
label = labels[r.label_id]
# draw the bounding box and label on the image
cv2.rectangle(orig, (startX, startY), (endX, endY),
(0, 255, 0), 2)
y = startY - 15 if startY - 15 > 15 else startY + 15
text = "{}: {:.2f}%".format(label, r.score * 100)
cv2.putText(orig, text, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
# show the output frame and wait for a key press
cv2.imshow("Frame", orig)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()