Skip to content

Commit

Permalink
add flow module
Browse files Browse the repository at this point in the history
  • Loading branch information
turingyizhu committed Jun 28, 2017
1 parent 6794bd5 commit 5ca01a1
Show file tree
Hide file tree
Showing 8 changed files with 14,012 additions and 75 deletions.
177 changes: 137 additions & 40 deletions datasets/ucf101.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,20 @@
import numpy as np
import cv2

IMG_EXTENSIONS = [
'.jpg', '.JPG', '.jpeg', '.JPEG',
'.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',
]

def is_image_file(filename):
return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)

def find_classes(dir):
classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
classes.sort()
class_to_idx = {classes[i]: i for i in range(len(classes))}
return classes, class_to_idx

def cv2_loader(path):
return cv2.imread(path)

def read_split_file(root, split_file):
def make_dataset(root, source):

if not os.path.exists(split_file):
print("Split file for ucf101 dataset doesn't exist.")
if not os.path.exists(source):
print("Setting file %s for ucf101 dataset doesn't exist." % (source))
sys.exit()
else:
clips = []
with open(split_file) as split_f:
with open(source) as split_f:
data = split_f.readlines()
for line in data:
line_info = line.split()
Expand All @@ -40,53 +29,160 @@ def read_split_file(root, split_file):
item = (clip_path, duration, target)
clips.append(item)
return clips


def ReadSegmentRGB(path, offsets, new_height, new_width, new_length, is_color, name_pattern):
if is_color:
cv_read_flag = cv2.IMREAD_COLOR # > 0
else:
cv_read_flag = cv2.IMREAD_GRAYSCALE # = 0
interpolation = cv2.INTER_LINEAR

sampled_list = []
for offset_id in range(len(offsets)):
offset = offsets[offset_id]
for length_id in range(1, new_length+1):
frame_name = name_pattern % (length_id + offset)
frame_path = path + "/" + frame_name
cv_img_origin = cv2.imread(frame_path, cv_read_flag)
if cv_img_origin is None:
print("Could not load file %s" % (frame_path))
sys.exit()
# TODO: error handling here
if new_width > 0 and new_height > 0:
cv_img = cv2.resize(cv_img_origin, (new_width, new_height), interpolation)
else:
cv_img = cv_img_origin
cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
sampled_list.append(cv_img)
clip_input = np.concatenate(sampled_list, axis=2)
return clip_input

def ReadSegmentFlow(path, offsets, new_height, new_width, new_length, is_color, name_pattern):
if is_color:
cv_read_flag = cv2.IMREAD_COLOR # > 0
else:
cv_read_flag = cv2.IMREAD_GRAYSCALE # = 0
interpolation = cv2.INTER_LINEAR

sampled_list = []
for offset_id in range(len(offsets)):
offset = offsets[offset_id]
for length_id in range(1, new_length+1):
frame_name_x = name_pattern % ("x", length_id + offset)
frame_path_x = path + "/" + frame_name_x
cv_img_origin_x = cv2.imread(frame_path_x, cv_read_flag)
frame_name_y = name_pattern % ("y", length_id + offset)
frame_path_y = path + "/" + frame_name_y
cv_img_origin_y = cv2.imread(frame_path_y, cv_read_flag)
if cv_img_origin_x is None or cv_img_origin_y is None:
print("Could not load file %s or %s" % (frame_path_x, frame_path_y))
sys.exit()
# TODO: error handling here
if new_width > 0 and new_height > 0:
cv_img_x = cv2.resize(cv_img_origin_x, (new_width, new_height), interpolation)
cv_img_y = cv2.resize(cv_img_origin_y, (new_width, new_height), interpolation)
else:
cv_img_x = cv_img_origin_x
cv_img_y = cv_img_origin_y
sampled_list.append(np.expand_dims(cv_img_x, 2))
sampled_list.append(np.expand_dims(cv_img_y, 2))

clip_input = np.concatenate(sampled_list, axis=2)
return clip_input


class ucf101(data.Dataset):

def __init__(self, root, split_file, phase, new_length=1, transform=None, target_transform=None,
video_transform=None, loader=cv2_loader):
def __init__(self,
root,
source,
phase,
modality,
name_pattern=None,
is_color=True,
num_segments=1,
new_length=1,
new_width=0,
new_height=0,
transform=None,
target_transform=None,
video_transform=None):

classes, class_to_idx = find_classes(root)
clips = read_split_file(root, split_file)
clips = make_dataset(root, source)

if len(clips) == 0:
raise(RuntimeError("Found 0 video clips in subfolders of: " + root + "\n"
"Check your data directory."))

self.root = root
self.split_file = split_file
self.source = source
self.phase = phase
self.clips = clips
self.modality = modality

self.classes = classes
self.class_to_idx = class_to_idx
self.clips = clips

if name_pattern:
self.name_pattern = name_pattern
else:
if self.modality == "rgb":
self.name_pattern = "image_%04d.jpg"
elif self.modality == "flow":
self.name_pattern = "flow_%s_%04d.jpg"

self.is_color = is_color
self.num_segments = num_segments
self.new_length = new_length
self.new_width = new_width
self.new_height = new_height

self.transform = transform
self.target_transform = target_transform
self.video_transform = video_transform
self.loader = loader

def __getitem__(self, index):
path, duration, target = self.clips[index]
frame_list = os.listdir(path)
frame_list.sort()
if self.phase == "train":
sampled_frameID = random.randint(0, duration-self.new_length)
elif self.phase == "val":
if duration >= self.new_length:
sampled_frameID = int((duration - self.new_length + 1)/2)
average_duration = int(duration / self.num_segments)
offsets = []
for seg_id in range(self.num_segments):
if self.phase == "train":
if average_duration >= self.new_length:
offset = random.randint(0, average_duration - self.new_length)
# No +1 because randint(a,b) return a random integer N such that a <= N <= b.
offsets.append(offset + seg_id * average_duration)
else:
offsets.append(0)
elif self.phase == "val":
if average_duration >= self.new_length:
offsets.append(int((average_duration - self.new_length + 1)/2 + seg_id * average_duration))
else:
offsets.append(0)
else:
sampled_frameID = 0
print("Only phase train and val are supported.")


if self.modality == "rgb":
clip_input = ReadSegmentRGB(path,
offsets,
self.new_height,
self.new_width,
self.new_length,
self.is_color,
self.name_pattern
)
elif self.modality == "flow":
clip_input = ReadSegmentFlow(path,
offsets,
self.new_height,
self.new_width,
self.new_length,
self.is_color,
self.name_pattern
)
else:
print("No such phase. Only train and val are supported.")

sampled_list = []
for frame_id in range(self.new_length):
fname = os.path.join(path, frame_list[sampled_frameID+frame_id])
if is_image_file(fname):
img = self.loader(fname)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
sampled_list.append(img)
clip_input = np.concatenate(sampled_list, axis=2)
print("No such modality %s" % (self.modality))

if self.transform is not None:
clip_input = self.transform(clip_input)
Expand All @@ -97,5 +193,6 @@ def __getitem__(self, index):

return clip_input, target


def __len__(self):
return len(self.clips)
4 changes: 3 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,10 @@
help='number of total epochs to run')
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
help='manual epoch number (useful on restarts)')
parser.add_argument('-b', '--batch-size', default=50, type=int,
parser.add_argument('-b', '--batch-size', default=32, type=int,
metavar='N', help='mini-batch size (default: 50)')
parser.add_argument('--iter-size', default=4, type=int,
metavar='I', help='iter size as in Caffe to reduce memory usage (default: 8)')
parser.add_argument('--new_length', default=1, type=int,
metavar='N', help='length of sampled video frames (default: 1)')
parser.add_argument('--lr', '--learning-rate', default=0.001, type=float,
Expand Down
Loading

0 comments on commit 5ca01a1

Please sign in to comment.