-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils_vid.py
90 lines (76 loc) · 3.25 KB
/
utils_vid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import skvideo.io
import cv2
import face_alignment
import pdb
import numpy as np
import pdb
def load_reader(vid_path):
reader = skvideo.io.FFmpegReader(vid_path)
video_shape = reader.getShape()
(num_frames, h, w, c) = video_shape
return reader, video_shape
def load_detector(device):
fa=face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device=device,flip_input=False, face_detector='sfd') # fa_probs_threshold = 0.95
return fa
def enlarge_box(box, enlarge_ratio, video_shape):
(num_frames, h, w, c) = video_shape
(xmin, ymin, width, height) = [int(v) for v in box]
""
## -- enlarge
center_y = ymin + int(height/2)
center_x = xmin + int(width/2)
## -- square
one_side = max(width, height)
half_one_side = int(one_side * (1 + enlarge_ratio) / 2)
## -- check one_side length
y_margin = min(abs(center_y), abs(1080-center_y))
x_margin = min(abs(center_x), abs(1920-center_x))
margin = min(y_margin, x_margin)
if margin < half_one_side:
half_one_side = margin
ymin_boundary = int(center_y - half_one_side)
ymax_boundary = int(center_y + half_one_side)
xmin_boundary = int(center_x - half_one_side)
xmax_boundary = int(center_x + half_one_side)
if ymin_boundary < 0:
ymin_boundary = 0
if xmin_boundary < 0:
xmin_boundary = 0
if xmax_boundary > int(w)-1:
xmax_boundary = int(w)-1
if ymax_boundary > int(h)-1:
ymax_boundary = int(h)-1
enlarged_square_box = [ymin_boundary, xmin_boundary, ymax_boundary, xmax_boundary] # [y:y+h, x:x+w]
return enlarged_square_box
def get_corrected_boxes(input_boxes,shift_frame_num,video_shape):
#pdb.set_trace()
corrected_boxes = []
(num_frames, h, w, c) = video_shape
rate_of_change = (np.array(input_boxes[-1]) - np.array(input_boxes[-2]))/shift_frame_num # last detection - last tracking
corrected_boxes = np.int64([np.array(input_boxes[j]) + rate_of_change*j for j in range(shift_frame_num)]) # linear interpolation
corrected_boxes = corrected_boxes.tolist()
## -- some component become negative after correction
positive_boxes = []
for box in corrected_boxes:
[left, top, right, bottom] = box
if left < 0:
left = 0
if top < 0:
top = 0
if bottom > int(w)-1:
bottom = int(w)-1
if right > int(h)-1:
right = int(h)-1
positive_boxes.append([left, top, right, bottom])
#pdb.set_trace()
positive_boxes.append(input_boxes[-1])
return positive_boxes
def crop_video(frame_list, corrected_boxes):
cropped_frame_list = []
for i, frame in enumerate(frame_list):
[left, top, right, bottom] = corrected_boxes[i]
cropped_img = frame[int(left):int(right), int(top):int(bottom)]
cropped_img = cv2.resize(cropped_img, (224,224), interpolation = cv2.INTER_LINEAR)
cropped_img = cv2.cvtColor(cropped_img, cv2.COLOR_RGB2BGR)
cropped_frame_list.append(cropped_img)
return cropped_frame_list