-
Notifications
You must be signed in to change notification settings - Fork 0
/
WISDM_CNN-LSTM.py
153 lines (135 loc) · 5.73 KB
/
WISDM_CNN-LSTM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# cnn lstm model
import numpy as np
# set the seeds for consistency
seed = 1
np.random.seed(seed)# set the numpy seed before importing keras
import random
random.seed(seed) #set the build-in seed
import tensorflow as tf
tf.random.set_seed(seed) # set the seed for tf
import pandas as pd
from numpy import dstack
from keras import optimizers
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten,Dropout
from keras.layers import LSTM
from keras.layers import TimeDistributed
from keras.layers.convolutional import Conv1D
from keras.utils.vis_utils import plot_model
from timeit import default_timer as timer
from matplotlib import pyplot as plt
from sklearn.utils import class_weight
from plot_confusion_matrix import cm_analysis
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
# load the labels file as a numpy array
def load_file(filepath):
labels = np.loadtxt(filepath, str)
return labels
# load a list of files and return as a 3d numpy array
def load_group(filenames, prefix=''):
loaded = list()
for name in filenames:
data = np.loadtxt(prefix + name, ndmin = 2)
loaded.append(data)
# stack group so that features are the 3rd dimension
loaded = dstack(loaded)
return loaded
#load a dataset group, such as train or test
def load_dataset_group(group, prefix = ''):
#load all 3 files as a single array
filenames = list()
# acceleration
filenames += [group+'_acc_x.txt', group+'_acc_y.txt', group+'_acc_z.txt']
# load input data
X = load_group(filenames, prefix)
# load class output
y = load_file(prefix + group + 'y.txt') #testy/ trainy
return X, y
#load the dataset, returns train and test X and y elements
def load_dataset():
# load all train
trainX, trainy = load_dataset_group('train', 'WISDM/')
print('trainX shape: ', trainX.shape, 'trainy shape :', trainy.shape)
# load all test
testX, testy = load_dataset_group('test', 'WISDM/')
print('testX shape: ', testX.shape, 'testy shape', testy.shape)
print('train class',pd.DataFrame(trainy).groupby(0).size())
## one hot encode y
#trainy = pd.get_dummies(trainy)
#testy = pd.get_dummies(testy)
#print('one hot trainy:\n', trainy, '\none hot testy:\n',testy)
#trainy = np.asarray(trainy)
#testy = np.asarray(testy)
#print('trainX shape: ', trainX.shape, 'trainy one hot shape :', trainy.shape,
#'\ntestX shape: ', testX.shape, 'testy one hot shape: ', testy.shape)
return trainX, trainy, testX, testy
trainX, trainy, testX, testy = load_dataset()
# encode class as integers
le = LabelEncoder()
le.fit(trainy)
le.fit(testy)
int_trainy = le.transform(trainy)
int_testy = le.transform(testy)
# convert integers to one-hot encode
dum_trainy = np_utils.to_categorical(int_trainy)
dum_testy = np_utils.to_categorical(int_testy)
print('trainX shape: ', trainX.shape, 'trainy one hot shape :', dum_trainy.shape,
'\ntestX shape: ', testX.shape, 'testy one hot shape: ', dum_testy.shape)
# build a model
def build_model(X, y):
# define model
model = Sequential()
model.add(TimeDistributed(Conv1D(filters= 35, kernel_size = 3, activation = 'relu'))) #, input_shape=(X.shape[1], X.shape[2], X.shape[3]
model.add(TimeDistributed(Conv1D(filters = 35, kernel_size = 3, activation = 'relu')))
model.add(TimeDistributed(Flatten()))
# model.add(LSTM(100, return_sequences = True))
model.add(LSTM(50)) #40
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='Adadelta',
metrics=['accuracy'])
#plot the model
# plot_model(model, show_shapes = True, to_file='WISDM_Conv1D+LSTM.png')
return model
# run an experiment
train_samples, train_features = trainX.shape[0], trainX.shape[2]
subsequences, n_steps = 4, 20
trainX = trainX.reshape((train_samples, subsequences, n_steps, train_features))
test_samples, test_features = testX.shape[0], testX.shape[2],
testX = testX.reshape((test_samples, subsequences, n_steps, test_features))
verbose, epochs, batch_size = 0, 30, 128
# compute the class weight
weights = class_weight.compute_class_weight('balanced',
np.unique(int_trainy),int_trainy)
dict_weights = dict(enumerate(weights))
print('dict_weights:\n',dict_weights)
model = build_model(trainX, dum_trainy)
# fit the model
start = timer()
history = model.fit(trainX, dum_trainy, epochs=epochs, batch_size=batch_size,
verbose=verbose, class_weight=dict_weights)
end = timer()
print('> training time:',end-start)
# evaluate the model
test_loss, test_accuracy = model.evaluate(testX, dum_testy, batch_size=batch_size, verbose=1)
test_accuracy = test_accuracy * 100
print(f'>: loss={test_loss}, accuracy={test_accuracy}')
#predict test set
start = timer()
pred = model.predict(testX)
end = timer()
print('> testing time:',end-start)
# get the column index of max in each row, then transform to the label names
int_predy = np.argmax(pred, axis = 1)
pred_y = le.inverse_transform(int_predy)
# classification report
from sklearn.metrics import classification_report
target_names = ['Downstairs','Jogging','Sitting','Standing','Upstairs','Walking']
print(classification_report(int_testy, int_predy, target_names = target_names,digits=4))
print(model.summary())
# # save confusion matrix
# cm_analysis(testy, pred_y, filename='WISDM_pics/with_weights/new_WISDM_Conv1D+LSTM_CM.png',
# labels=['Downstairs','Jogging','Sitting',
# 'Standing','Upstairs','Walking'], figsize=(8,8))