-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_parse.py
80 lines (63 loc) · 2.79 KB
/
data_parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import facenet
import numpy as np
def split_dataset(dataset, min_nrof_images_per_class, nrof_test_images_per_class):
"""
split_dataset - function to split the dataset into a train set and a test set
args dataset - dataset to be split
min_nrof_images_per_class - minimum num of images required for a class to be used
nrof_train_images_per_class - num of images used for training within a class
returns train_set - dataset for training
test_set - dataset for testing
num_classes - number of classes
"""
train_set = []
test_set = []
for cls in dataset:
paths = cls.image_paths
# Remove classes with less than min_nrof_images_per_class
if len(paths)>=min_nrof_images_per_class:
np.random.shuffle(paths)
test_set.append(facenet.ImageClass(cls.name, paths[:nrof_test_images_per_class]))
train_set.append(facenet.ImageClass(cls.name, paths[nrof_test_images_per_class:]))
num_classes = len(test_set)
print('Classes (Number of Faces): %d' % num_classes)
return train_set, test_set, num_classes
def labels_to_int(labels):
"""
labels_to_int - function to convert labels to ints because svm will not accept strings as labels
args labels - array of labels to be converted
returns int_labels - array of labels in int form
int_label_dict - dictionary for easy lookup of name to int
int_label_dict_reverse - dictionary for easy lookup of int to name
"""
current_name = ""
current_int = 0
int_labels = []
int_label_dict = {}
int_label_dict_reverse = {}
for name in labels:
if name != current_name:
current_name = name
int_labels.append(current_int)
int_label_dict.update({current_name: current_int})
int_label_dict_reverse.update({current_int: current_name})
current_int += 1
else:
int_labels.append(current_int)
int_labels[0] = 1
int_labels = np.asarray(int_labels)
#returns the label array in ints and a dictionary to easily look up what number belongs to what person
return int_labels, int_label_dict, int_label_dict_reverse
def int_label_lookup(test_labels, int_label_dict):
"""
int_label_lookup - function to convert test labels to ints.
when given a test set, the data is randomised for fairness
but we need to make sure to match the correct labels to the correct ints.
args test_labels - labels for use in test
int_label_dict - dictionary for easy label int lookup
return int_labels - test labels converted to ints
"""
int_labels = []
for name in test_labels:
int_labels.append(int_label_dict[name])
return int_labels