-
Notifications
You must be signed in to change notification settings - Fork 0
/
WISDM_multi_hybrid.py
174 lines (152 loc) · 6.79 KB
/
WISDM_multi_hybrid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# multichannel cnn lstm model
import numpy as np
# set the seeds for consistency
seed = 1
np.random.seed(seed)# set the numpy seed before importing keras
import random
random.seed(seed) #set the build-in seed
import tensorflow as tf
tf.random.set_seed(seed) # set the seed for tf
import pandas as pd
from keras.utils.vis_utils import plot_model
from timeit import default_timer as timer
from sklearn.utils import class_weight
from plot_confusion_matrix import cm_analysis
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from numpy import dstack
from keras.models import Model
from keras.layers import Dense
from keras.layers import Input
from keras.layers import Flatten
from keras.layers import LSTM
from keras.layers import TimeDistributed
from keras.layers.convolutional import Conv1D
from keras.layers import concatenate
from keras.utils.vis_utils import plot_model
from keras.utils import np_utils
from matplotlib import pyplot as plt
# load the labels file as a numpy array
def load_file(filepath):
labels = np.loadtxt(filepath, str)
return labels
# load a list of files and return as a 3d numpy array
def load_group(filenames, prefix=''):
loaded = list()
for name in filenames:
data = np.loadtxt(prefix + name, ndmin = 2)
loaded.append(data)
# stack group so that features are the 3rd dimension
loaded = dstack(loaded)
return loaded
#load a dataset group, such as train or test
def load_dataset_group(group, prefix = ''):
#load all 3 files as a single array
filenames = list()
# acceleration
filenames += [group+'_acc_x.txt', group+'_acc_y.txt', group+'_acc_z.txt']
# load input data
X = load_group(filenames, prefix)
# load class output
y = load_file(prefix + group + 'y.txt') #testy/ trainy
return X, y
#load the dataset, returns train and test X and y elements
def load_dataset():
# load all train
trainX, trainy = load_dataset_group('train', 'WISDM/')
print('trainX shape: ', trainX.shape, 'trainy shape :', trainy.shape)
# load all test
testX, testy = load_dataset_group('test', 'WISDM/')
print('testX shape: ', testX.shape, 'testy shape', testy.shape)
print('train class',pd.DataFrame(trainy).groupby(0).size())
return trainX, trainy, testX, testy
trainX, trainy, testX, testy = load_dataset()
# encode class as integers
le = LabelEncoder()
le.fit(trainy)
le.fit(testy)
int_trainy = le.transform(trainy)
int_testy = le.transform(testy)
# convert integers to one-hot encode
dum_trainy = np_utils.to_categorical(int_trainy)
dum_testy = np_utils.to_categorical(int_testy)
print('trainX shape: ', trainX.shape, 'trainy one hot shape :', dum_trainy.shape,
'\ntestX shape: ', testX.shape, 'testy one hot shape: ', dum_testy.shape)
# build a model
def build_model(X, y):
# channel 1
inputs1 = Input(shape=(X.shape[1], X.shape[2],1))
conv1_1 = TimeDistributed(Conv1D(filters = 35, kernel_size = 3, activation = 'relu'))(inputs1)
conv1_2 = TimeDistributed(Conv1D(filters = 35, kernel_size = 3, activation = 'relu'))(conv1_1)
flat1 = TimeDistributed(Flatten())(conv1_2)
# lstm1_1 = LSTM(100, return_sequences=True)(flat1)
lstm1_2 = LSTM(35)(flat1)
# channel 2
inputs2 = Input(shape=(X.shape[1], X.shape[2],1))
conv2_1 = TimeDistributed(Conv1D(filters = 20, kernel_size = 3, activation = 'relu'))(inputs2)
conv2_2 = TimeDistributed(Conv1D(filters = 20, kernel_size = 3, activation = 'relu'))(conv2_1)
flat2 = TimeDistributed(Flatten())(conv2_2)
# lstm2_1 = LSTM(100, return_sequences=True)(flat2)
lstm2_2 = LSTM(30)(flat2)
# channel 3
inputs3 = Input(shape=(X.shape[1], X.shape[2],1))
conv3_1 = TimeDistributed(Conv1D(filters = 20, kernel_size = 5, activation = 'relu'))(inputs3)
conv3_2 = TimeDistributed(Conv1D(filters = 20, kernel_size = 5, activation = 'relu'))(conv3_1)
flat3 = TimeDistributed(Flatten())(conv3_2)
# lstm3_1 = LSTM(100, return_sequences=True)(flat3)
lstm3_2 = LSTM(30)(flat3)
# merge
merged = concatenate([lstm1_2, lstm2_2, lstm3_2])
# interpretation
outputs = Dense(y.shape[1], activation='softmax')(merged)
model = Model(inputs = [inputs1, inputs2, inputs3], outputs = outputs)
# save a plot of the model
model.compile(loss='categorical_crossentropy', optimizer='Adadelta', metrics=['accuracy'])
#print(model.summary())
# plot the model
# plot_model(model, show_shapes=True, to_file='WISDM_Multi_hybrid.png')
return model
# reshape inputs from [samples, timesteps, features]
# into [samples, subsequences, timesteps, features]
train_samples, train_features = trainX.shape[0], trainX.shape[2]
subsequences, n_steps = 4, 20
trainX = trainX.reshape((train_samples, subsequences, n_steps, train_features))
# run an experiment
# fit the model
verbose, epochs, batch_size = 0, 30, 128
weights = class_weight.compute_class_weight('balanced',
np.unique(int_trainy),int_trainy)
dict_weights = dict(enumerate(weights))
print('dict_weights:\n',dict_weights)
# evaluate the model for 10 times
test_samples, test_features = testX.shape[0], testX.shape[2],
testX = testX.reshape((test_samples, subsequences, n_steps, test_features))
#loss, accuracy = list(), list()
# fit the model
model = build_model(trainX, dum_trainy)
start = timer()
history = model.fit([trainX[:, :, :, :1], trainX[:, :, :, 1:2], trainX[:, :, :, 2:]], dum_trainy, epochs=epochs,
batch_size=batch_size, class_weight=dict_weights, verbose=verbose)
end = timer()
print('training time (s):', end - start)
test_loss, test_accuracy = model.evaluate([testX[:,:,:,:1],testX[:,:,:,1:2],testX[:,:,:,2:]],
dum_testy, batch_size=batch_size, verbose=0)
test_accuracy = test_accuracy * 100
print(f'>1: loss={test_loss}, accuracy={test_accuracy}')
#predict test set
start = timer()
pred = model.predict([testX[:,:,:,:1],testX[:,:,:,1:2],testX[:,:,:,2:]])
end = timer()
print('> testing time:',end-start)
# get the column index of max in each row, then transform to the label names
int_predy = np.argmax(pred, axis = 1)
pred_y = le.inverse_transform(int_predy)
# classification report
from sklearn.metrics import classification_report
target_names = ['Downstairs','Jogging','Sitting','Standing','Upstairs','Walking']
print(classification_report(int_testy, int_predy, target_names = target_names,digits=4))
print(model.summary())
# # save confusion matrix
# cm_analysis(testy, pred_y, filename='WISDM_pics/with_weights/with_weights_WISDM_Multi_hybrid_CM.png',
# labels=['Downstairs','Jogging','Sitting',
# 'Standing','Upstairs','Walking'], figsize=(8,8))