-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathucl_mlp_keras.py
161 lines (137 loc) · 4.86 KB
/
ucl_mlp_keras.py
1
from keras.models import Sequentialfrom keras.layers.core import Dense, Dropout, Activationfrom keras.optimizers import SGDfrom keras.datasets import cifar10 import kerasimport numpy as npimport matplotlib.pyplot as pltimport theanoimport theano.tensor as Timport linecacheimport mathfrom sklearn.metrics import roc_auc_scorefrom sklearn.metrics import mean_squared_errorrng = np.randomrng.seed(1234)#parameterstrain_size=312437 #training sizetest_size=156063 #test sizetrain_file='./train.dl.bin.notag.txt' #training filetest_file='./test.dl.bin.notag.txt' #test filen_epoch=8 #number of epochsbatch_size_train=30 #batch size of train setfeats=65 #features sizen_layer_one=361 #the number of bottom layeractivation_one='tanh'drop_out=0.3log_file='./ucl_mlp_keras.log.txt'lr=0.005momentum=0.9#n_feats_before_fm=16 #number of original features#write logs for analysisdef write_logs(msg): with open(log_file, "a+") as myfile: myfile.write(msg+"\n")write_logs('init weight:tanh num of laryer on:'+str(n_layer_one)+'\tDrop out:'+str(drop_out)+'\tLearning rate:'+str(lr)+'\tmomentum:'+str(momentum)+'\tactivation one:'+str(activation_one))#get all test setdef getAllTextData(): array=[] arrayY=[] for i in range(test_size): line=linecache.getline(test_file, i+1) if line.strip()!="": y=line[0:line.index(',')] x=line[line.index(',')+1:] arr=[float(xx) for xx in x.split(',')] array.append(arr) arrayY.append(int(y)) xarray=np.array(array, dtype=theano.config.floatX) yarray=np.array(arrayY, dtype=np.int32) return [xarray,yarray]#get all train setdef getAllTrainData(): array=[] arrayY=[] for i in range(train_size): line=linecache.getline(train_file, i+1) if line.strip()!="": y=line[0:line.index(',')] x=line[line.index(',')+1:] arr=[float(xx) for xx in x.split(',')] array.append(arr) arrayY.append(int(y)) xarray=np.array(array, dtype=theano.config.floatX) yarray=np.array(arrayY, dtype=np.int32) return [xarray,yarray] #init train/test dataX_train,y_train=getAllTrainData()y_train=y_train.tolist()X_test,y_test=getAllTextData()y_test=y_test.tolist()#init weight w1 = [np.array(rng.uniform( low=-np.sqrt(6. / (feats + n_layer_one)), high=np.sqrt(6. / (feats + n_layer_one)), size=(feats,n_layer_one) ), dtype=theano.config.floatX),np.zeros(n_layer_one)] ww=np.zeros((feats,n_layer_one)) for i in range(feats): for j in range(n_layer_one): if ((i==0 or (i%4)==1) and j==0): ww[i,j]=1#w1=[np.asarray(ww),np.zeros(n_layer_one)]w2=[np.ones((n_layer_one,1)),np.zeros(1)]#define Multi-layer Perceptron model = Sequential()model.add(Dense(feats, n_layer_one,weights=w1))model.add(Activation(activation_one))model.add(Dropout(drop_out))model.add(Dense(n_layer_one, 1,weights=w2))model.add(Activation('sigmoid'))sgd = SGD(lr=lr, decay=1e-6, momentum=momentum, nesterov=True)model.compile(loss='binary_crossentropy', optimizer=sgd)#init erroryp = model.predict_proba(X_test, batch_size=10)auc = roc_auc_score(y_test, yp)rmse = math.sqrt(mean_squared_error(y_test, yp))print ' Init Test Err: ' + str(auc) + '\t' + str(rmse)write_logs('Init Test Err: ' + str(auc) + '\t' + str(rmse))best_auc=0weights=[]#print test errordef get_evaluation(): global best_auc global weights yp = model.predict_proba(X_test, batch_size=100) auc = roc_auc_score(y_test, yp) rmse = math.sqrt(mean_squared_error(y_test, yp)) print ' Test Err: ' + str(auc) + '\t' + str(rmse) if auc>best_auc: best_auc=auc for layer in model.layers: weights.append(layer.get_weights()) write_logs(' Test Err: ' + str(auc) + '\t' + str(rmse)) #print train set errordef get_train_evaluation(): yp = model.predict_proba(X_train, batch_size=100) auc = roc_auc_score(y_train, yp) rmse = math.sqrt(mean_squared_error(y_train, yp)) print ' Train Err: ' + str(auc) + '\t' + str(rmse) write_logs(' \tTrain Err: ' + str(auc) + '\t' + str(rmse)) #get errer informationclass LossHistory(keras.callbacks.Callback): def on_train_begin(self, logs={}): self.losses = [] def on_epoch_end(self, batch, logs={}): self.losses.append(logs.get('loss')) get_train_evaluation() get_evaluation() # traininghistory = LossHistory()model.fit(X_train, y_train, nb_epoch=n_epoch, batch_size=batch_size_train,callbacks=[history])# proba = model.predict_proba(X_test, batch_size=32)# score = model.evaluate(X_test, y_test, batch_size=16)# print history.lossesprint 'Best test erro of AUC: ',best_aucwrite_logs('Best test erro of AUC: '+str(best_auc))write_logs('')