diff --git a/AD_Invert.py b/AD_Invert.py new file mode 100644 index 0000000..09b0fb0 --- /dev/null +++ b/AD_Invert.py @@ -0,0 +1,151 @@ +import tensorflow as tf +import numpy as np +import pdb +import json +from mod_core_rnn_cell_impl import LSTMCell # modified to allow initializing bias in lstm + +import data_utils +import plotting +import model +import mmd +import utils +import eval +import DR_discriminator + +from differential_privacy.dp_sgd.dp_optimizer import dp_optimizer +from differential_privacy.dp_sgd.dp_optimizer import sanitizer +from differential_privacy.privacy_accountant.tf import accountant + +# --- get settings --- # +# parse command line arguments, or use defaults +parser = utils.rgan_options_parser() +settings = vars(parser.parse_args()) +# if a settings file is specified, it overrides command line arguments/defaults +if settings['settings_file']: settings = utils.load_settings_from_file(settings) + +# --- get data, split --- # +data_path = './experiments/data/' + settings['data_load_from'] + '.data.npy' +print('Loading data from', data_path) +samples, labels, index = data_utils.get_data(settings["data"], settings["seq_length"], settings["seq_step"], + settings["num_signals"], settings["sub_id"], settings["eval_single"], + settings["eval_an"], data_path) +# --- save settings, data --- # +# no need +print('Ready to run with settings:') +for (k, v) in settings.items(): print(v, '\t', k) +# add the settings to local environment +# WARNING: at this point a lot of variables appear +locals().update(settings) +json.dump(settings, open('./experiments/settings/' + identifier + '.txt', 'w'), indent=0) + +class myADclass(): + def __init__(self, epoch, settings=settings, samples=samples, labels=labels, index=index): + self.epoch = epoch + self.settings = settings + self.samples = samples + self.labels = labels + self.index = index + def ADfunc(self): + num_samples_t = self.samples.shape[0] + t_size = 500 + T_index = np.random.choice(num_samples_t, size=t_size, replace=False) + print('sample_shape:', self.samples.shape[0]) + print('num_samples_t', num_samples_t) + + # -- only discriminate one batch for one time -- # + D_test = np.empty([t_size, self.settings['seq_length'], 1]) + DL_test = np.empty([t_size, self.settings['seq_length'], 1]) + GG = np.empty([t_size, self.settings['seq_length'], self.settings['num_signals']]) + T_samples = np.empty([t_size, self.settings['seq_length'], self.settings['num_signals']]) + L_mb = np.empty([t_size, self.settings['seq_length'], 1]) + I_mb = np.empty([t_size, self.settings['seq_length'], 1]) + for batch_idx in range(0, t_size): + # print('epoch:{}'.format(self.epoch)) + # print('batch_idx:{}'.format(batch_idx)) + # display batch progress + model.display_batch_progression(batch_idx, t_size) + T_mb = self.samples[T_index[batch_idx], :, :] + L_mmb = self.labels[T_index[batch_idx], :, :] + I_mmb = self.index[T_index[batch_idx], :, :] + para_path = './experiments/parameters/' + self.settings['sub_id'] + '_' + str( + self.settings['seq_length']) + '_' + str(self.epoch) + '.npy' + D_t, L_t = DR_discriminator.dis_D_model(self.settings, T_mb, para_path) + Gs, Zs, error_per_sample, heuristic_sigma = DR_discriminator.invert(self.settings, T_mb, para_path, + g_tolerance=None, + e_tolerance=0.1, n_iter=None, + max_iter=1000, + heuristic_sigma=None) + GG[batch_idx, :, :] = Gs + T_samples[batch_idx, :, :] = T_mb + D_test[batch_idx, :, :] = D_t + DL_test[batch_idx, :, :] = L_t + L_mb[batch_idx, :, :] = L_mmb + I_mb[batch_idx, :, :] = I_mmb + + # -- use self-defined evaluation functions -- # + # -- test different tao values for the detection function -- # + results = np.empty([5, 5]) + # for i in range(2, 8): + # tao = 0.1 * i + tao = 0.5 + lam = 0.8 + Accu1, Pre1, Rec1, F11, FPR1, D_L1 = DR_discriminator.detection_D_I(DL_test, L_mb, I_mb, self.settings['seq_step'], tao) + print('seq_length:', self.settings['seq_length']) + print('D:Comb-logits-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}' + .format(self.epoch, tao, Accu1, Pre1, Rec1, F11, FPR1)) + results[0, :] = [Accu1, Pre1, Rec1, F11, FPR1] + + Accu2, Pre2, Rec2, F12, FPR2, D_L2 = DR_discriminator.detection_D_I(D_test, L_mb, I_mb, self.settings['seq_step'], tao) + print('seq_length:', self.settings['seq_length']) + print('D:Comb-statistic-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}' + .format(self.epoch, tao, Accu2, Pre2, Rec2, F12, FPR2)) + results[1, :] = [Accu2, Pre2, Rec2, F12, FPR2] + + Accu3, Pre3, Rec3, F13, FPR3, D_L3 = DR_discriminator.detection_R_D_I(DL_test, GG, T_samples, L_mb, self.settings['seq_step'], tao, lam) + print('seq_length:', self.settings['seq_length']) + print('RD:Comb-logits_based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}' + .format(self.epoch, tao, Accu3, Pre3, Rec3, F13, FPR3)) + results[2, :] = [Accu3, Pre3, Rec3, F13, FPR3] + + Accu4, Pre4, Rec4, F14, FPR4, D_L4 = DR_discriminator.detection_R_D_I(D_test, GG, T_samples, L_mb, self.settings['seq_step'], tao, lam) + print('seq_length:', self.settings['seq_length']) + print('RD:Comb-statistic-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}' + .format(self.epoch, tao, Accu4, Pre4, Rec4, F14, FPR4)) + results[3, :] = [Accu4, Pre4, Rec4, F14, FPR4] + + Accu5, Pre5, Rec5, F15, FPR5, D_L5 = DR_discriminator.detection_R_I(GG, T_samples, L_mb, self.settings['seq_step'],tao) + print('seq_length:', self.settings['seq_length']) + print('G:Comb-sample-based-Epoch: {}; tao={:.1}; Accu: {:.4}; Pre: {:.4}; Rec: {:.4}; F1: {:.4}; FPR: {:.4}' + .format(self.epoch, tao, Accu5, Pre5, Rec5, F15, FPR5)) + results[4, :] = [Accu5, Pre5, Rec5, F15, FPR5] + + return results, GG, D_test, DL_test + + + +if __name__ == "__main__": + print('Main Starting...') + + Results = np.empty([settings['num_epochs'], 5, 5]) + + t_size = 500 + D_test = np.empty([settings['num_epochs'], t_size, settings['seq_length'], 1]) + DL_test = np.empty([settings['num_epochs'], t_size, settings['seq_length'], 1]) + GG = np.empty([settings['num_epochs'], t_size, settings['seq_length'], settings['num_signals']]) + + for epoch in range(settings['num_epochs']): + # for epoch in range(1): + ob = myADclass(epoch) + Results[epoch, :, :], GG[epoch, :, :, :], D_test[epoch, :, :, :], DL_test[epoch, :, :, :] = ob.ADfunc() + + res_path = './experiments/plots/Results_Invert' + '_' + settings['sub_id'] + '_' + str( + settings['seq_length']) + '.npy' + np.save(res_path, Results) + + dg_path = './experiments/plots/DG_Invert' + '_' + settings['sub_id'] + '_' + str( + settings['seq_length']) + '_' + np.save(dg_path + 'D_test.npy', D_test) + np.save(dg_path + 'DL_test.npy', DL_test) + np.save(dg_path + 'GG.npy', DL_test) + + print('Main Terminating...') \ No newline at end of file diff --git a/DR_discriminator.py b/DR_discriminator.py new file mode 100644 index 0000000..c943ab8 --- /dev/null +++ b/DR_discriminator.py @@ -0,0 +1,794 @@ +import numpy as np +import tensorflow as tf +import matplotlib.pyplot as plt +from matplotlib.colors import hsv_to_rgb +import model +import mmd +from mod_core_rnn_cell_impl import LSTMCell +from sklearn.metrics import precision_recall_fscore_support + +def anomaly_detection_plot(D_test, T_mb, L_mb, D_L, epoch, identifier): + + aa = D_test.shape[0] + bb = D_test.shape[1] + D_L = D_L.reshape([aa, bb, -1]) + + x_points = np.arange(bb) + + fig, ax = plt.subplots(4, 4, sharex=True) + for m in range(4): + for n in range(4): + D = D_test[n * 4 + m, :, :] + T = T_mb[n * 4 + m, :, :] + L = L_mb[n * 4 + m, :, :] + DL = D_L[n * 4 + m, :, :] + ax[m, n].plot(x_points, D, '--g', label='Pro') + ax[m, n].plot(x_points, T, 'b', label='Data') + ax[m, n].plot(x_points, L, 'k', label='Label') + ax[m, n].plot(x_points, DL, 'r', label='Label') + ax[m, n].set_ylim(-1, 1) + for n in range(4): + ax[-1, n].xaxis.set_ticks(range(0, bb, int(bb/6))) + fig.suptitle(epoch) + fig.subplots_adjust(hspace=0.15) + fig.savefig("./experiments/plots/DR_dis/" + identifier + "_epoch" + str(epoch).zfill(4) + ".png") + plt.clf() + plt.close() + + return True + +def detection_Comb(Label_test, L_mb, I_mb, seq_step, tao): + aa = Label_test.shape[0] + bb = Label_test.shape[1] + + LL = (aa-1)*seq_step+bb + + Label_test = abs(Label_test.reshape([aa, bb])) + L_mb = L_mb .reshape([aa, bb]) + I_mb = I_mb .reshape([aa, bb]) + D_L = np.zeros([LL, 1]) + L_L = np.zeros([LL, 1]) + Count = np.zeros([LL, 1]) + for i in range(0, aa): + for j in range(0, bb): + # print('index:', i*10+j) + D_L[i*seq_step+j] += Label_test[i, j] + L_L[i * seq_step + j] += L_mb[i, j] + Count[i * seq_step + j] += 1 + + D_L /= Count + L_L /= Count + + TP, TN, FP, FN = 0, 0, 0, 0 + + for i in range(LL): + if D_L[i] > tao: + # true/negative + D_L[i] = 0 + else: + # false/positive + D_L[i] = 1 + + cc = (D_L == L_L) + # print('D_L:', D_L) + # print('L_L:', L_L) + cc = list(cc.reshape([-1])) + N = cc.count(True) + + print('N:', N) + + Accu = float((N / LL) * 100) + + precision, recall, f1, _ = precision_recall_fscore_support(L_L, D_L, average='binary') + + return Accu, precision, recall, f1, + + +def detection_logits_I(DL_test, L_mb, I_mb, seq_step, tao): + aa = DL_test.shape[0] + bb = DL_test.shape[1] + + LL = (aa-1)*seq_step+bb + + DL_test = abs(DL_test.reshape([aa, bb])) + L_mb = L_mb .reshape([aa, bb]) + I_mb = I_mb .reshape([aa, bb]) + D_L = np.zeros([LL, 1]) + L_L = np.zeros([LL, 1]) + Count = np.zeros([LL, 1]) + for i in range(0, aa): + for j in range(0, bb): + # print('index:', i*10+j) + D_L[i*seq_step+j] += DL_test[i, j] + L_L[i * seq_step + j] += L_mb[i, j] + Count[i * seq_step + j] += 1 + + D_L /= Count + L_L /= Count + + TP, TN, FP, FN = 0, 0, 0, 0 + + for i in range(LL): + if D_L[i] > tao: + # true/negative + D_L[i] = 0 + else: + # false/positive + D_L[i] = 1 + + A = D_L[i] + B = L_L[i] + if A == 1 and B == 1: + TP += 1 + elif A == 1 and B == 0: + FP += 1 + elif A == 0 and B == 0: + TN += 1 + elif A == 0 and B == 1: + FN += 1 + + + cc = (D_L == L_L) + # print('D_L:', D_L) + # print('L_L:', L_L) + cc = list(cc.reshape([-1])) + N = cc.count(True) + + print('N:', N) + + Accu = float((N / LL) * 100) + + precision, recall, f1, _ = precision_recall_fscore_support(L_L, D_L, average='binary') + + # true positive among all the detected positive + # Pre = (100 * TP) / (TP + FP + 1) + # # true positive among all the real positive + # Rec = (100 * TP) / (TP + FN + 1) + # # The F1 score is the harmonic average of the precision and recall, + # # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0. + # F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1)) + # False positive rate--false alarm rate + FPR = (100 * FP) / (FP + TN+1) + + return Accu, precision, recall, f1, FPR, D_L + +def detection_statistic_I(D_test, L_mb, I_mb, seq_step, tao): + # point-wise detection for one dimension + + aa = D_test.shape[0] + bb = D_test.shape[1] + + LL = (aa-1) * seq_step + bb + # print('aa:', aa) + # print('bb:', bb) + # print('LL:', LL) + + D_test = D_test.reshape([aa, bb]) + L_mb = L_mb.reshape([aa, bb]) + I_mb = I_mb.reshape([aa, bb]) + D_L = np.zeros([LL, 1]) + L_L = np.zeros([LL, 1]) + Count = np.zeros([LL, 1]) + for i in range(0, aa): + for j in range(0, bb): + # print('index:', i * 10 + j) + D_L[i * seq_step + j] += D_test[i, j] + L_L[i * seq_step + j] += L_mb[i, j] + Count[i * seq_step + j] += 1 + + D_L /= Count + L_L /= Count + + TP, TN, FP, FN = 0, 0, 0, 0 + + for i in range(LL): + if D_L[i] > tao: + # true/negative + D_L[i] = 0 + else: + # false/positive + D_L[i] = 1 + + A = D_L[i] + B = L_L[i] + if A == 1 and B == 1: + TP += 1 + elif A == 1 and B == 0: + FP += 1 + elif A == 0 and B == 0: + TN += 1 + elif A == 0 and B == 1: + FN += 1 + + cc = (D_L == L_L) + cc = list(cc.reshape([-1])) + N = cc.count(True) + Accu = float((N / LL) * 100) + + precision, recall, f1, _ = precision_recall_fscore_support(L_L, D_L, average='binary') + + # true positive among all the detected positive + # Pre = (100 * TP) / (TP + FP + 1) + # # true positive among all the real positive + # Rec = (100 * TP) / (TP + FN + 1) + # # The F1 score is the harmonic average of the precision and recall, + # # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0. + # F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1)) + # False positive rate--false alarm rate + FPR = (100 * FP) / (FP + TN) + + return Accu, precision, recall, f1, FPR, D_L + +def detection_D_I(DD, L_mb, I_mb, seq_step, tao): + # point-wise detection for one dimension + + aa = DD.shape[0] + bb = DD.shape[1] + + LL = (aa-1)*seq_step+bb + + DD = abs(DD.reshape([aa, bb])) + L_mb = L_mb .reshape([aa, bb]) + I_mb = I_mb .reshape([aa, bb]) + D_L = np.zeros([LL, 1]) + L_L = np.zeros([LL, 1]) + Count = np.zeros([LL, 1]) + for i in range(0, aa): + for j in range(0, bb): + # print('index:', i*10+j) + D_L[i*10+j] += DD[i, j] + L_L[i * 10 + j] += L_mb[i, j] + Count[i * 10 + j] += 1 + + D_L /= Count + L_L /= Count + + TP, TN, FP, FN = 0, 0, 0, 0 + + for i in range(LL): + if D_L[i] > tao: + # true/negative + D_L[i] = 0 + else: + # false/positive + D_L[i] = 1 + + A = D_L[i] + B = L_L[i] + if A == 1 and B == 1: + TP += 1 + elif A == 1 and B == 0: + FP += 1 + elif A == 0 and B == 0: + TN += 1 + elif A == 0 and B == 1: + FN += 1 + + + cc = (D_L == L_L) + # print('D_L:', D_L) + # print('L_L:', L_L) + cc = list(cc.reshape([-1])) + N = cc.count(True) + + print('N:', N) + + Accu = float((N / LL) * 100) + + # true positive among all the detected positive + Pre = (100 * TP) / (TP + FP + 1) + # true positive among all the real positive + Rec = (100 * TP) / (TP + FN + 1) + # The F1 score is the harmonic average of the precision and recall, + # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0. + F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1)) + # False positive rate--false alarm rate + FPR = (100 * FP) / (FP + TN+1) + + return Accu, Pre, Rec, F1, FPR, D_L + +def detection_R_D_I(DD, Gs, T_mb, L_mb, seq_step, tao, lam): + # point-wise detection for one dimension + # (1-lambda)*R(x)+lambda*D(x) + # lambda=0.5? + # D_test, Gs, T_mb, L_mb are of same size + + R = np.absolute(Gs - T_mb) + R = np.mean(R, axis=2) + aa = DD.shape[0] + bb = DD.shape[1] + + LL = (aa - 1) * seq_step + bb + + DD = abs(DD.reshape([aa, bb])) + DD = 1-DD + L_mb = L_mb.reshape([aa, bb]) + R = R.reshape([aa, bb]) + + D_L = np.zeros([LL, 1]) + R_L = np.zeros([LL, 1]) + L_L = np.zeros([LL, 1]) + L_pre = np.zeros([LL, 1]) + Count = np.zeros([LL, 1]) + for i in range(0, aa): + for j in range(0, bb): + # print('index:', i*10+j) + D_L[i * 10 + j] += DD[i, j] + L_L[i * 10 + j] += L_mb[i, j] + R_L[i * 10 + j] += R[i, j] + Count[i * 10 + j] += 1 + D_L /= Count + L_L /= Count + R_L /= Count + + TP, TN, FP, FN = 0, 0, 0, 0 + + for i in range(LL): + if (1-lam)*R_L[i] + lam*D_L[i] > tao: + # false + L_pre[i] = 1 + else: + # true + L_pre[i] = 0 + + A = L_pre[i] + # print('A:', A) + B = L_L[i] + # print('B:', B) + if A == 1 and B == 1: + TP += 1 + elif A == 1 and B == 0: + FP += 1 + elif A == 0 and B == 0: + TN += 1 + elif A == 0 and B == 1: + FN += 1 + + cc = (L_pre == L_L) + cc = list(cc.reshape([-1])) + N = cc.count(True) + Accu = float((N / (aa*bb)) * 100) + + # true positive among all the detected positive + Pre = (100 * TP) / (TP + FP + 1) + # true positive among all the real positive + Rec = (100 * TP) / (TP + FN + 1) + # The F1 score is the harmonic average of the precision and recall, + # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0. + F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1)) + # False positive rate + FPR = (100 * FP) / (FP + TN+1) + + return Accu, Pre, Rec, F1, FPR, L_pre + +def detection_R_I(Gs, T_mb, L_mb, seq_step, tao): + # point-wise detection for one dimension + # (1-lambda)*R(x)+lambda*D(x) + # lambda=0.5? + # D_test, Gs, T_mb, L_mb are of same size + + R = np.absolute(Gs - T_mb) + R = np.mean(R, axis=2) + aa = R.shape[0] + bb = R.shape[1] + + LL = (aa - 1) * seq_step + bb + + L_mb = L_mb.reshape([aa, bb]) + R = R.reshape([aa, bb]) + + L_L = np.zeros([LL, 1]) + R_L = np.zeros([LL, 1]) + L_pre = np.zeros([LL, 1]) + Count = np.zeros([LL, 1]) + for i in range(0, aa): + for j in range(0, bb): + # print('index:', i*10+j) + L_L[i * 10 + j] += L_mb[i, j] + R_L[i * 10 + j] += R[i, j] + Count[i * 10 + j] += 1 + L_L /= Count + R_L /= Count + + TP, TN, FP, FN = 0, 0, 0, 0 + + for i in range(LL): + if R_L[i] > tao: + # false + L_pre[i] = 1 + else: + # true + L_pre[i] = 0 + + A = L_pre[i] + B = L_L[i] + if A == 1 and B == 1: + TP += 1 + elif A == 1 and B == 0: + FP += 1 + elif A == 0 and B == 0: + TN += 1 + elif A == 0 and B == 1: + FN += 1 + + cc = (L_pre == L_L) + cc = list(cc.reshape([-1])) + N = cc.count(True) + Accu = float((N / (aa*bb)) * 100) + + # true positive among all the detected positive + Pre = (100 * TP) / (TP + FP + 1) + # true positive among all the real positive + Rec = (100 * TP) / (TP + FN + 1) + # The F1 score is the harmonic average of the precision and recall, + # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0. + F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1)) + # False positive rate + FPR = (100 * FP) / (FP + TN+1) + + return Accu, Pre, Rec, F1, FPR, L_pre + + +def sample_detection(D_test, L_mb, tao): + # sample-wise detection for one dimension + + aa = D_test.shape[0] + bb = D_test.shape[1] + + D_test = D_test.reshape([aa, bb]) + L_mb = L_mb.reshape([aa, bb]) + L = np.sum(L_mb, 1) + # NN = 0-10 + L[L > 0] = 1 + + D_L = np.empty([aa, ]) + + for i in range(aa): + if np.mean(D_test[i, :]) > tao: + # true/negative + D_L[i] = 0 + else: + # false/positive + D_L[i] = 1 + + cc = (D_L == L) + # cc = list(cc) + N = list(cc).count(True) + Accu = float((N / (aa)) * 100) + + precision, recall, f1, _ = precision_recall_fscore_support(L, D_L, average='binary') + + return Accu, precision, recall, f1 + + +def CUSUM_det(spe_n, spe_a, labels): + + mu = np.mean(spe_n) + sigma = np.std(spe_n) + + kk = 3*sigma + H = 15*sigma + print('H:', H) + + tar = np.mean(spe_a) + + mm = spe_a.shape[0] + + SH = np.empty([mm, ]) + SL = np.empty([mm, ]) + + for i in range(mm): + SH[-1] = 0 + SL[-1] = 0 + SH[i] = max(0, SH[i-1]+spe_a[i]-(tar+kk)) + SL[i] = min(0, SL[i-1]+spe_a[i]-(tar-kk)) + + + count = np.empty([mm, ]) + TP = 0 + TN = 0 + FP = 0 + FN = 0 + for i in range(mm): + A = SH[i] + B = SL[i] + AA = H + BB = -H + if A <= AA and B >= BB: + count[i] = 0 + else: + count[i] = 1 + + C = count[i] + D = labels[i] + if C == 1 and D == 1: + TP += 1 + elif C == 1 and D == 0: + FP += 1 + elif C == 0 and D == 0: + TN += 1 + elif C == 0 and D == 1: + FN += 1 + + cc = (count == labels) + # cc = list(cc) + N = list(cc).count(True) + Accu = float((N / (mm)) * 100) + + # true positive among all the detected positive + Pre = (100 * TP) / (TP + FP + 1) + # true positive among all the real positive + Rec = (100 * TP) / (TP + FN) + # The F1 score is the harmonic average of the precision and recall, + # where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0. + F1 = (2 * Pre * Rec) / (100 * (Pre + Rec + 1)) + # False positive rate + FPR = (100 * FP) / (FP + TN) + + return Accu, Pre, Rec, F1, FPR + + +def SPE(X, pc): + a = X.shape[0] + b = X.shape[1] + + spe = np.empty([a]) + # Square Prediction Error (square of residual distance) + # spe = X'(I-PP')X + I = np.identity(b, float) - np.matmul(pc.transpose(1, 0), pc) + # I = np.matmul(I, I) + for i in range(a): + x = X[i, :].reshape([b, 1]) + y = np.matmul(x.transpose(1, 0), I) + spe[i] = np.matmul(y, x) + + return spe + + + +def generator_o(z, hidden_units_g, seq_length, batch_size, num_generated_features, reuse=False, parameters=None, learn_scale=True): + """ + If parameters are supplied, initialise as such + """ + # It is important to specify different variable scopes for the LSTM cells. + with tf.variable_scope("generator_o") as scope: + + W_out_G_initializer = tf.constant_initializer(value=parameters['generator/W_out_G:0']) + b_out_G_initializer = tf.constant_initializer(value=parameters['generator/b_out_G:0']) + try: + scale_out_G_initializer = tf.constant_initializer(value=parameters['generator/scale_out_G:0']) + except KeyError: + scale_out_G_initializer = tf.constant_initializer(value=1) + assert learn_scale + lstm_initializer = tf.constant_initializer(value=parameters['generator/rnn/lstm_cell/weights:0']) + bias_start = parameters['generator/rnn/lstm_cell/biases:0'] + + W_out_G = tf.get_variable(name='W_out_G', shape=[hidden_units_g, num_generated_features], initializer=W_out_G_initializer) + b_out_G = tf.get_variable(name='b_out_G', shape=num_generated_features, initializer=b_out_G_initializer) + scale_out_G = tf.get_variable(name='scale_out_G', shape=1, initializer=scale_out_G_initializer, trainable=False) + + inputs = z + + cell = LSTMCell(num_units=hidden_units_g, + state_is_tuple=True, + initializer=lstm_initializer, + bias_start=bias_start, + reuse=reuse) + rnn_outputs, rnn_states = tf.nn.dynamic_rnn( + cell=cell, + dtype=tf.float32, + sequence_length=[seq_length] * batch_size, + inputs=inputs) + rnn_outputs_2d = tf.reshape(rnn_outputs, [-1, hidden_units_g]) + logits_2d = tf.matmul(rnn_outputs_2d, W_out_G) + b_out_G #out put weighted sum + output_2d = tf.nn.tanh(logits_2d) # logits operation [-1, 1] + output_3d = tf.reshape(output_2d, [-1, seq_length, num_generated_features]) + return output_3d + + +def discriminator_o(x, hidden_units_d, reuse=False, parameters=None): + + with tf.variable_scope("discriminator_0") as scope: + + W_out_D_initializer = tf.constant_initializer(value=parameters['discriminator/W_out_D:0']) + b_out_D_initializer = tf.constant_initializer(value=parameters['discriminator/b_out_D:0']) + + W_out_D = tf.get_variable(name='W_out_D', shape=[hidden_units_d, 1], initializer=W_out_D_initializer) + b_out_D = tf.get_variable(name='b_out_D', shape=1, initializer=b_out_D_initializer) + + + inputs = x + + cell = tf.contrib.rnn.LSTMCell(num_units=hidden_units_d, state_is_tuple=True, reuse=reuse) + + rnn_outputs, rnn_states = tf.nn.dynamic_rnn(cell=cell, dtype=tf.float32, inputs=inputs) + + + logits = tf.einsum('ijk,km', rnn_outputs, W_out_D) + b_out_D # output weighted sum + + output = tf.nn.sigmoid(logits) # y = 1 / (1 + exp(-x)). output activation [0, 1]. Probability?? + # sigmoid output ([0,1]), Probability? + + return output, logits + + +def invert(settings, samples, para_path, g_tolerance=None, e_tolerance=0.1, + n_iter=None, max_iter=10000, heuristic_sigma=None): + """ + Return the latent space points corresponding to a set of a samples + ( from gradient descent ) + Note: this function is designed for ONE sample generation + """ + # num_samples = samples.shape[0] + # cast samples to float32 + + samples = np.float32(samples) + + # get the model + # if settings is a string, assume it's an identifier and load + if type(settings) == str: + settings = json.load(open('./experiments/settings/' + settings + '.txt', 'r')) + + + + # print('Inverting', 1, 'samples using model', settings['identifier'], 'at epoch', epoch,) + # if not g_tolerance is None: + # print('until gradient norm is below', g_tolerance) + # else: + # print('until error is below', e_tolerance) + + + # get parameters + parameters = model.load_parameters(para_path) + # # assertions + # assert samples.shape[2] == settings['num_generated_features'] + # create VARIABLE Z + Z = tf.get_variable(name='Z', shape=[1, settings['seq_length'], + settings['latent_dim']], + initializer=tf.random_normal_initializer()) + # create outputs + + G_samples = generator_o(Z, settings['hidden_units_g'], settings['seq_length'], + 1, settings['num_generated_features'], + reuse=False, parameters=parameters) + # generator_vars = ['hidden_units_g', 'seq_length', 'batch_size', 'num_generated_features', 'cond_dim', 'learn_scale'] + # generator_settings = dict((k, settings[k]) for k in generator_vars) + # G_samples = model.generator(Z, **generator_settings, reuse=True) + + fd = None + + # define loss mmd-based loss + if heuristic_sigma is None: + heuristic_sigma = mmd.median_pairwise_distance_o(samples) # this is noisy + print('heuristic_sigma:', heuristic_sigma) + samples = tf.reshape(samples, [1, settings['seq_length'], settings['num_generated_features']]) + Kxx, Kxy, Kyy, wts = mmd._mix_rbf_kernel(G_samples, samples, sigmas=tf.constant(value=heuristic_sigma, shape=(1, 1))) + similarity_per_sample = tf.diag_part(Kxy) + reconstruction_error_per_sample = 1 - similarity_per_sample + # reconstruction_error_per_sample = tf.reduce_sum((tf.nn.l2_normalize(G_samples, dim=1) - tf.nn.l2_normalize(samples, dim=1))**2, axis=[1,2]) + similarity = tf.reduce_mean(similarity_per_sample) + reconstruction_error = 1 - similarity + # updater + # solver = tf.train.AdamOptimizer().minimize(reconstruction_error_per_sample, var_list=[Z]) + # solver = tf.train.RMSPropOptimizer(learning_rate=500).minimize(reconstruction_error, var_list=[Z]) + solver = tf.train.RMSPropOptimizer(learning_rate=0.1).minimize(reconstruction_error_per_sample, var_list=[Z]) + # solver = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=0.9).minimize(reconstruction_error_per_sample, var_list=[Z]) + + grad_Z = tf.gradients(reconstruction_error_per_sample, Z)[0] + grad_per_Z = tf.norm(grad_Z, axis=(1, 2)) + grad_norm = tf.reduce_mean(grad_per_Z) + # solver = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(reconstruction_error, var_list=[Z]) + print('Finding latent state corresponding to samples...') + + sess = tf.Session() + sess.run(tf.global_variables_initializer()) + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + error = sess.run(reconstruction_error, feed_dict=fd) + g_n = sess.run(grad_norm, feed_dict=fd) + # print(g_n) + i = 0 + if not n_iter is None: + while i < n_iter: + _ = sess.run(solver, feed_dict=fd) + error = sess.run(reconstruction_error, feed_dict=fd) + i += 1 + else: + if not g_tolerance is None: + while g_n > g_tolerance: + _ = sess.run(solver, feed_dict=fd) + error, g_n = sess.run([reconstruction_error, grad_norm], feed_dict=fd) + i += 1 + print(error, g_n) + if i > max_iter: + break + else: + while np.abs(error) > e_tolerance: + _ = sess.run(solver, feed_dict=fd) + error = sess.run(reconstruction_error, feed_dict=fd) + i += 1 + # print(error) + if i > max_iter: + break + Zs = sess.run(Z, feed_dict=fd) + Gs = sess.run(G_samples, feed_dict={Z: Zs}) + error_per_sample = sess.run(reconstruction_error_per_sample, feed_dict=fd) + print('Z found in', i, 'iterations with final reconstruction error of', error) + tf.reset_default_graph() + + return Gs, Zs, error_per_sample, heuristic_sigma + + +def dis_trained_model(settings, samples, para_path): + """ + Return the discrimination results of num_samples testing samples from a trained model described by settings dict + Note: this function is designed for ONE sample discrimination + """ + + # if settings is a string, assume it's an identifier and load + if type(settings) == str: + settings = json.load(open('./experiments/settings/' + settings + '.txt', 'r')) + + num_samples = samples.shape[0] + samples = np.float32(samples) + num_variables = samples.shape[2] + # samples = np.reshape(samples, [1, settings['seq_length'], settings['num_generated_features']]) + + # get the parameters, get other variables + # parameters = model.load_parameters(settings['sub_id'] + '_' + str(settings['seq_length']) + '_' + str(epoch)) + parameters = model.load_parameters(para_path) + # settings['sub_id'] + '_' + str(settings['seq_length']) + '_' + str(epoch) + + # create placeholder, T samples + # T = tf.placeholder(tf.float32, [settings['batch_size'], settings['seq_length'], settings['num_generated_features']]) + + T = tf.placeholder(tf.float32, [num_samples, settings['seq_length'], num_variables]) + + # create the discriminator (GAN) + # normal GAN + D_t, L_t = discriminator_o(T, settings['hidden_units_d'], reuse=False, parameters=parameters) + + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + # with tf.device('/gpu:1'): + with tf.Session(config=config) as sess: + sess.run(tf.global_variables_initializer()) + D_t, L_t = sess.run([D_t, L_t], feed_dict={T: samples}) + + tf.reset_default_graph() + return D_t, L_t + +def dis_D_model(settings, samples, para_path): + """ + Return the discrimination results of num_samples testing samples from a trained model described by settings dict + Note: this function is designed for ONE sample discrimination + """ + + # if settings is a string, assume it's an identifier and load + if type(settings) == str: + settings = json.load(open('./experiments/settings/' + settings + '.txt', 'r')) + + # num_samples = samples.shape[0] + samples = np.float32(samples) + samples = np.reshape(samples, [1, settings['seq_length'], settings['num_generated_features']]) + + # get the parameters, get other variables + parameters = model.load_parameters(para_path) + # create placeholder, T samples + + T = tf.placeholder(tf.float32, [1, settings['seq_length'], settings['num_generated_features']]) + + # create the discriminator (GAN or CGAN) + # normal GAN + D_t, L_t = discriminator_o(T, settings['hidden_units_d'], reuse=False, parameters=parameters) + # D_t, L_t = model.discriminator(T, settings['hidden_units_d'], settings['seq_length'], num_samples, reuse=False, + # parameters=parameters, cond_dim=0, c=None, batch_mean=False) + + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + D_t, L_t = sess.run([D_t, L_t], feed_dict={T: samples}) + + tf.reset_default_graph() + return D_t, L_t \ No newline at end of file diff --git a/data_utils.py b/data_utils.py new file mode 100644 index 0000000..5122361 --- /dev/null +++ b/data_utils.py @@ -0,0 +1,639 @@ +import numpy as np +import pandas as pd +import pdb +import re +from time import time +import json +import random + +import model + +from scipy.spatial.distance import pdist, squareform +from scipy.stats import multivariate_normal, invgamma, mode +from scipy.special import gamma +from scipy.misc.pilutil import imresize +from functools import partial +from math import ceil + +from sklearn.metrics.pairwise import rbf_kernel +from sklearn.preprocessing import MinMaxScaler +from sklearn import preprocessing + + +# --- deal with the SWaT data --- # +def swat(seq_length, seq_step, num_signals, randomize=False): + """ Load and serialise """ + # train = np.load('./data/swat.npy') + # print('Loaded swat from .npy') + train = np.loadtxt(open('./data/swat.csv'), delimiter=',') + print('Loaded swat from .csv') + m, n = train.shape # m=496800, n=52 + for i in range(n - 1): + A = max(train[:, i]) + if A != 0: + train[:, i] /= max(train[:, i]) + # scale from -1 to 1 + train[:, i] = 2 * train[:, i] - 1 + else: + train[:, i] = train[:, i] + + samples = train[21600:, 0:n-1] + labels = train[21600:, n-1] # the last colummn is label + ############################# + # -- choose variable for uni-variate GAN-AD -- # + # samples = samples[:, [1, 8, 18, 28]] + ############################ + # -- apply PCA dimension reduction for multi-variate GAN-AD -- # + from sklearn.decomposition import PCA + # ALL SENSORS IDX + # XS = [0, 1, 5, 6, 7, 8, 16, 17, 18, 25, 26, 27, 28, 33, 34, 35, 36, 37, 38, 39, 40, 41, 44, 45, 46, 47] + # X_n = samples[:, XS] + # X_a = samples_a[:, XS] + # All VARIABLES + X_n = samples + #################################### + ################################### + # -- the best PC dimension is chosen pc=5 -- # + n_components = num_signals + pca = PCA(n_components, svd_solver='full') + pca.fit(X_n) + ex_var = pca.explained_variance_ratio_ + pc = pca.components_ + + # projected values on the principal component + T_n = np.matmul(X_n, pc.transpose(1, 0)) + samples = T_n + + # # only for one-dimensional + # samples = T_n.reshape([samples.shape[0], ]) + ########################################### + ########################################### + # seq_length = 7200 + num_samples = (samples.shape[0]-seq_length)//seq_step + print("num_samples:", num_samples) + print("num_signals:", num_signals) + aa = np.empty([num_samples, seq_length, num_signals]) + bb = np.empty([num_samples, seq_length, 1]) + + for j in range(num_samples): + bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1,1]) + for i in range(num_signals): + aa[j, :, i] = samples[(j * seq_step):(j*seq_step + seq_length), i] + + # samples = aa[:, 0:7200:200, :] + # labels = bb[:, 0:7200:200, :] + samples = aa + labels = bb + + return samples, labels + +def swat_birgan(seq_length, seq_step, num_signals, randomize=False): + """ Load and serialise """ + # train = np.load('./data/swat.npy') + # print('Loaded swat from .npy') + train = np.loadtxt(open('./data/swat.csv'), delimiter=',') + print('Loaded swat from .csv') + m, n = train.shape # m=496800, n=52 + for i in range(n - 1): + A = max(train[:, i]) + if A != 0: + train[:, i] /= max(train[:, i]) + # scale from -1 to 1 + train[:, i] = 2 * train[:, i] - 1 + else: + train[:, i] = train[:, i] + + samples = train[21600:, 0:n-1] + labels = train[21600:, n-1] # the last colummn is label + ############################# + # # -- choose variable for uni-variate GAN-AD -- # + # # samples = samples[:, [1, 8, 18, 28]] + ########################################### + ########################################### + nn = samples.shape[1] + num_samples = (samples.shape[0]-seq_length)//seq_step + aa = np.empty([num_samples, nn, nn]) + AA = np.empty([seq_length, nn]) + bb = np.empty([num_samples, seq_length, 1]) + + print('Pre-process training data...') + for j in range(num_samples): + # display batch progress + model_bigan.display_batch_progression(j, num_samples) + bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1,1]) + for i in range(nn): + AA[:, i] = samples[(j * seq_step):(j * seq_step + seq_length), i] + aa[j, :, :] = np.cov(AA.T) + + samples = aa + labels = bb + + return samples, labels + +def swat_test(seq_length, seq_step, num_signals, randomize=False): + """ Load and serialise """ + # test = np.load('./data/swat_a.npy') + # print('Loaded swat_a from .npy') + test = np.loadtxt(open('./data/swat_a.csv'), delimiter=',') + print('Loaded swat_a from .csv') + m, n = test.shape # m1=449919, n1=52 + for i in range(n - 1): + B = max(test[:, i]) + if B != 0: + test[:, i] /= max(test[:, i]) + # scale from -1 to 1 + test[:, i] = 2 * test[:, i] - 1 + else: + test[:, i] = test[:, i] + + samples = test[:, 0:n - 1] + labels = test[:, n - 1] + idx = np.asarray(list(range(0, m))) # record the idx of each point + ############################# + # -- choose variable for uni-variate GAN-AD -- # + # samples = samples[:, [1,2,3,4]] + # samples_a = samples_a[:, [1,2,3,4]] + ############################ + ############################ + # -- apply PCA dimension reduction for multi-variate GAN-AD -- # + from sklearn.decomposition import PCA + import DR_discriminator as dr + # ALL SENSORS IDX + # XS = [0, 1, 5, 6, 7, 8, 16, 17, 18, 25, 26, 27, 28, 33, 34, 35, 36, 37, 38, 39, 40, 41, 44, 45, 46, 47] + # X_n = samples[:, XS] + # X_a = samples_a[:, XS] + # All VARIABLES + X_a = samples + #################################### + ################################### + # -- the best PC dimension is chosen pc=5 -- # + n_components = num_signals + pca_a = PCA(n_components, svd_solver='full') + pca_a.fit(X_a) + pc_a = pca_a.components_ + # projected values on the principal component + T_a = np.matmul(X_a, pc_a.transpose(1, 0)) + + samples = T_a + # # only for one-dimensional + # samples = T_a.reshape([samples.shape[0], ]) + ########################################### + ########################################### + num_samples_t = (samples.shape[0] - seq_length) // seq_step + aa = np.empty([num_samples_t, seq_length, num_signals]) + bb = np.empty([num_samples_t, seq_length, 1]) + bbb = np.empty([num_samples_t, seq_length, 1]) + + for j in range(num_samples_t): + bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1]) + bbb[j, :, :] = np.reshape(idx[(j * seq_step):(j * seq_step + seq_length)], [-1, 1]) + for i in range(num_signals): + aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i] + + samples = aa + labels = bb + index = bbb + + return samples, labels, index + + +def swat_birgan_test(seq_length, seq_step, num_signals, randomize=False): + """ Load and serialise """ + # train = np.load('./data/swat.npy') + # print('Loaded swat from .npy') + test = np.loadtxt(open('./data/swat_a.csv'), delimiter=',') + print('Loaded swat_a from .csv') + m, n = test.shape # m1=449919, n1=52 + for i in range(n - 1): + B = max(test[:, i]) + if B != 0: + test[:, i] /= max(test[:, i]) + # scale from -1 to 1 + test[:, i] = 2 * test[:, i] - 1 + else: + test[:, i] = test[:, i] + + samples = test[:, 0:n - 1] + labels = test[:, n - 1] + # idx = np.asarray(list(range(0, m))) # record the idx of each point + ############################# + # # -- choose variable for uni-variate GAN-AD -- # + # # samples = samples[:, [1, 8, 18, 28]] + ########################################### + ########################################### + nn = samples.shape[1] + num_samples = (samples.shape[0]-seq_length)//seq_step + aa = np.empty([num_samples, nn, nn]) + AA = np.empty([seq_length, nn]) + bb = np.empty([num_samples, seq_length, 1]) + + print('Pre-process testing data...') + for j in range(num_samples): + # display batch progress + model_bigan.display_batch_progression(j, num_samples) + bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1,1]) + for i in range(nn): + AA[:, i] = samples[(j * seq_step):(j * seq_step + seq_length), i] + aa[j, :, :] = np.cov(AA.T) + + samples = aa + labels = bb + + return samples, labels + + +def wadi(seq_length, seq_step, num_signals, randomize=False): + train = np.load('./data/wadi.npy') + print('Loaded wadi from .npy') + m, n = train.shape # m=1048571, n=119 + for i in range(n-1): + A = max(train[:, i]) + if A != 0: + train[:, i] /= max(train[:, i]) + # scale from -1 to 1 + train[:, i] = 2 * train[:, i] - 1 + else: + train[:, i] = train[:, i] + + samples = train[259200:, 0:n-1] # normal + labels = train[259200:, n-1] + ############################# + samples = samples[:, [0, 3, 6, 17]] + # samples = samples[:, 0] + ############################ + # # -- apply PCA dimension reduction for multi-variate GAN-AD -- # + # from sklearn.decomposition import PCA + # import DR_discriminator as dr + # X_n = samples + # #################################### + # ################################### + # # -- the best PC dimension is chosen pc=8 -- # + # n_components = num_signals + # pca = PCA(n_components, svd_solver='full') + # pca.fit(X_n) + # pc = pca.components_ + # # projected values on the principal component + # T_n = np.matmul(X_n, pc.transpose(1, 0)) + # + # samples = T_n + # # # only for one-dimensional + # # samples = T_n.reshape([samples.shape[0], ]) + ########################################### + ########################################### + seq_length = 10800 + num_samples = (samples.shape[0] - seq_length) // seq_step + print("num_samples:", num_samples) + print("num_signals:", num_signals) + aa = np.empty([num_samples, seq_length, num_signals]) + bb = np.empty([num_samples, seq_length, 1]) + + for j in range(num_samples): + bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1]) + # aa[j, :, :] = np.reshape(samples[(j * seq_step):(j * seq_step + seq_length)], [-1, 1]) + for i in range(num_signals): + aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i] + + samples = aa[:, 0:10800:300, :] + labels = bb[:, 0:10800:300, :] + + return samples, labels + + +def wadi_test(seq_length, seq_step, num_signals, randomize=False): + test = np.load('./data/wadi_a.npy') + print('Loaded wadi_a from .npy') + m, n = test.shape # m1=172801, n1=119 + + for i in range(n - 1): + B = max(test[:, i]) + if B != 0: + test[:, i] /= max(test[:, i]) + # scale from -1 to 1 + test[:, i] = 2 * test[:, i] - 1 + else: + test[:, i] = test[:, i] + + samples = test[:, 0:n - 1] + labels = test[:, n - 1] + idx = np.asarray(list(range(0, m))) # record the idx of each point + ############################# + ############################ + # -- apply PCA dimension reduction for multi-variate GAN-AD -- # + from sklearn.decomposition import PCA + import DR_discriminator as dr + X_a = samples + #################################### + ################################### + # -- the best PC dimension is chosen pc=8 -- # + n_components = num_signals + pca_a = PCA(n_components, svd_solver='full') + pca_a.fit(X_a) + pc_a = pca_a.components_ + # projected values on the principal component + T_a = np.matmul(X_a, pc_a.transpose(1, 0)) + + samples = T_a + # # only for one-dimensional + # samples = T_a.reshape([samples.shape[0], ]) + ########################################### + ########################################### + num_samples_t = (samples.shape[0] - seq_length) // seq_step + aa = np.empty([num_samples_t, seq_length, num_signals]) + bb = np.empty([num_samples_t, seq_length, 1]) + bbb = np.empty([num_samples_t, seq_length, 1]) + + for j in range(num_samples_t): + bb[j, :, :] = np.reshape(labels[(j * 10):(j * seq_step + seq_length)], [-1, 1]) + bbb[j, :, :] = np.reshape(idx[(j * seq_step):(j * seq_step + seq_length)], [-1, 1]) + for i in range(num_signals): + aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i] + + samples = aa + labels = bb + index = bbb + + return samples, labels, index + +def kdd99(seq_length, seq_step, num_signals): + train = np.load('./data/kdd99_train.npy') + print('load kdd99_train from .npy') + m, n = train.shape # m=562387, n=35 + # normalization + for i in range(n - 1): + # print('i=', i) + A = max(train[:, i]) + # print('A=', A) + if A != 0: + train[:, i] /= max(train[:, i]) + # scale from -1 to 1 + train[:, i] = 2 * train[:, i] - 1 + else: + train[:, i] = train[:, i] + + samples = train[:, 0:n - 1] + labels = train[:, n - 1] # the last colummn is label + ############################# + ############################ + # -- apply PCA dimension reduction for multi-variate GAN-AD -- # + from sklearn.decomposition import PCA + X_n = samples + #################################### + ################################### + # -- the best PC dimension is chosen pc=6 -- # + n_components = num_signals + pca = PCA(n_components, svd_solver='full') + pca.fit(X_n) + ex_var = pca.explained_variance_ratio_ + pc = pca.components_ + # projected values on the principal component + T_n = np.matmul(X_n, pc.transpose(1, 0)) + samples = T_n + # # only for one-dimensional + # samples = T_n.reshape([samples.shape[0], ]) + ########################################### + ########################################### + num_samples = (samples.shape[0] - seq_length) // seq_step + aa = np.empty([num_samples, seq_length, num_signals]) + bb = np.empty([num_samples, seq_length, 1]) + + for j in range(num_samples): + bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1]) + for i in range(num_signals): + aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i] + + samples = aa + labels = bb + + return samples, labels + +def kdd99_test(seq_length, seq_step, num_signals): + test = np.load('./data/kdd99_test.npy') + print('load kdd99_test from .npy') + + m, n = test.shape # m1=494021, n1=35 + + for i in range(n - 1): + B = max(test[:, i]) + if B != 0: + test[:, i] /= max(test[:, i]) + # scale from -1 to 1 + test[:, i] = 2 * test[:, i] - 1 + else: + test[:, i] = test[:, i] + + samples = test[:, 0:n - 1] + labels = test[:, n - 1] + idx = np.asarray(list(range(0, m))) # record the idx of each point + ############################# + ############################ + # -- apply PCA dimension reduction for multi-variate GAN-AD -- # + from sklearn.decomposition import PCA + import DR_discriminator as dr + X_a = samples + #################################### + ################################### + # -- the best PC dimension is chosen pc=6 -- # + n_components = num_signals + pca_a = PCA(n_components, svd_solver='full') + pca_a.fit(X_a) + pc_a = pca_a.components_ + # projected values on the principal component + T_a = np.matmul(X_a, pc_a.transpose(1, 0)) + samples = T_a + # # only for one-dimensional + # samples = T_a.reshape([samples.shape[0], ]) + ########################################### + ########################################### + num_samples_t = (samples.shape[0] - seq_length) // seq_step + aa = np.empty([num_samples_t, seq_length, num_signals]) + bb = np.empty([num_samples_t, seq_length, 1]) + bbb = np.empty([num_samples_t, seq_length, 1]) + + for j in range(num_samples_t): + bb[j, :, :] = np.reshape(labels[(j * seq_step):(j * seq_step + seq_length)], [-1, 1]) + bbb[j, :, :] = np.reshape(idx[(j * seq_step):(j * seq_step + seq_length)], [-1, 1]) + for i in range(num_signals): + aa[j, :, i] = samples[(j * seq_step):(j * seq_step + seq_length), i] + + samples = aa + labels = bb + index = bbb + + return samples, labels, index + + +# ############################ data pre-processing ################################# +# --- to do with loading --- # +# --- to do with loading --- # +def get_samples_and_labels(settings): + """ + Parse settings options to load or generate correct type of data, + perform test/train split as necessary, and reform into 'samples' and 'labels' + dictionaries. + """ + if settings['data_load_from']: + data_path = './experiments/data/' + settings['data_load_from'] + '.data.npy' + print('Loading data from', data_path) + samples, pdf, labels = get_data('load', data_path) + train, vali, test = samples['train'], samples['vali'], samples['test'] + train_labels, vali_labels, test_labels = labels['train'], labels['vali'], labels['test'] + del samples, labels + else: + # generate the data + data_vars = ['num_samples', 'num_samples_t','seq_length', 'seq_step', 'num_signals', 'freq_low', + 'freq_high', 'amplitude_low', 'amplitude_high', 'scale', 'full_mnist'] + data_settings = dict((k, settings[k]) for k in data_vars if k in settings.keys()) + samples, pdf, labels = get_data(settings['data'], settings['seq_length'], settings['seq_step'], settings['num_signals'], settings['sub_id']) + if 'multivariate_mnist' in settings and settings['multivariate_mnist']: + seq_length = samples.shape[1] + samples = samples.reshape(-1, int(np.sqrt(seq_length)), int(np.sqrt(seq_length))) + if 'normalise' in settings and settings['normalise']: # TODO this is a mess, fix + print(settings['normalise']) + norm = True + else: + norm = False + if labels is None: + train, vali, test = split(samples, [0.6, 0.2, 0.2], normalise=norm) + train_labels, vali_labels, test_labels = None, None, None + else: + train, vali, test, labels_list = split(samples, [0.6, 0.2, 0.2], normalise=norm, labels=labels) + train_labels, vali_labels, test_labels = labels_list + + labels = dict() + labels['train'], labels['vali'], labels['test'] = train_labels, vali_labels, test_labels + + samples = dict() + samples['train'], samples['vali'], samples['test'] = train, vali, test + + # futz around with labels + # TODO refactor cause this is messy + if 'one_hot' in settings and settings['one_hot'] and not settings['data_load_from']: + if len(labels['train'].shape) == 1: + # ASSUME labels go from 0 to max_val inclusive, find max-val + max_val = int(np.max([labels['train'].max(), labels['test'].max(), labels['vali'].max()])) + # now we have max_val + 1 dimensions + print('Setting cond_dim to', max_val + 1, 'from', settings['cond_dim']) + settings['cond_dim'] = max_val + 1 + print('Setting max_val to 1 from', settings['max_val']) + settings['max_val'] = 1 + + labels_oh = dict() + for (k, v) in labels.items(): + A = np.zeros(shape=(len(v), settings['cond_dim'])) + A[np.arange(len(v)), (v).astype(int)] = 1 + labels_oh[k] = A + labels = labels_oh + else: + assert settings['max_val'] == 1 + # this is already one-hot! + + if 'predict_labels' in settings and settings['predict_labels']: + samples, labels = data_utils.make_predict_labels(samples, labels) + print('Setting cond_dim to 0 from', settings['cond_dim']) + settings['cond_dim'] = 0 + + # update the settings dictionary to update erroneous settings + # (mostly about the sequence length etc. - it gets set by the data!) + settings['seq_length'] = samples['train'].shape[1] + settings['num_samples'] = samples['train'].shape[0] + samples['vali'].shape[0] + samples['test'].shape[0] + settings['num_signals'] = samples['train'].shape[2] + + return samples, pdf, labels + + +def get_data(data_type, seq_length, seq_step, num_signals, sub_id, eval_single, eval_an, data_options=None): + """ + Helper/wrapper function to get the requested data. + """ + print('data_type') + labels = None + index = None + if data_type == 'load': + data_dict = np.load(data_options).item() + samples = data_dict['samples'] + pdf = data_dict['pdf'] + labels = data_dict['labels'] + elif data_type == 'swat': + samples, labels = swat(seq_length, seq_step, num_signals) + elif data_type == 'swat_test': + samples, labels, index = swat_test(seq_length, seq_step, num_signals) + elif data_type == 'kdd99': + samples, labels = kdd99(seq_length, seq_step, num_signals) + elif data_type == 'kdd99_test': + samples, labels, index = kdd99_test(seq_length, seq_step, num_signals) + elif data_type == 'wadi': + samples, labels = wadi(seq_length, seq_step, num_signals) + elif data_type == 'wadi_test': + samples, labels, index = wadi_test(seq_length, seq_step, num_signals) + else: + raise ValueError(data_type) + print('Generated/loaded', len(samples), 'samples from data-type', data_type) + return samples, labels, index + + +def get_batch(samples, batch_size, batch_idx, labels=None): + start_pos = batch_idx * batch_size + end_pos = start_pos + batch_size + if labels is None: + return samples[start_pos:end_pos], None + else: + if type(labels) == tuple: # two sets of labels + assert len(labels) == 2 + return samples[start_pos:end_pos], labels[0][start_pos:end_pos], labels[1][start_pos:end_pos] + else: + assert type(labels) == np.ndarray + return samples[start_pos:end_pos], labels[start_pos:end_pos] + + + +def split(samples, proportions, normalise=False, scale=False, labels=None, random_seed=None): + """ + Return train/validation/test split. + """ + if random_seed != None: + random.seed(random_seed) + np.random.seed(random_seed) + assert np.sum(proportions) == 1 + n_total = samples.shape[0] + n_train = ceil(n_total * proportions[0]) + n_test = ceil(n_total * proportions[2]) + n_vali = n_total - (n_train + n_test) + # permutation to shuffle the samples + shuff = np.random.permutation(n_total) + train_indices = shuff[:n_train] + vali_indices = shuff[n_train:(n_train + n_vali)] + test_indices = shuff[(n_train + n_vali):] + # TODO when we want to scale we can just return the indices + assert len(set(train_indices).intersection(vali_indices)) == 0 + assert len(set(train_indices).intersection(test_indices)) == 0 + assert len(set(vali_indices).intersection(test_indices)) == 0 + # split up the samples + train = samples[train_indices] + vali = samples[vali_indices] + test = samples[test_indices] + # apply the same normalisation scheme to all parts of the split + if normalise: + if scale: raise ValueError(normalise, scale) # mutually exclusive + train, vali, test = normalise_data(train, vali, test) + elif scale: + train, vali, test = scale_data(train, vali, test) + if labels is None: + return train, vali, test + else: + print('Splitting labels...') + if type(labels) == np.ndarray: + train_labels = labels[train_indices] + vali_labels = labels[vali_indices] + test_labels = labels[test_indices] + labels_split = [train_labels, vali_labels, test_labels] + elif type(labels) == dict: + # more than one set of labels! (weird case) + labels_split = dict() + for (label_name, label_set) in labels.items(): + train_labels = label_set[train_indices] + vali_labels = label_set[vali_indices] + test_labels = label_set[test_indices] + labels_split[label_name] = [train_labels, vali_labels, test_labels] + else: + raise ValueError(type(labels)) + return train, vali, test, labels_split diff --git a/eugenium_mmd.py b/eugenium_mmd.py new file mode 100644 index 0000000..30dbd75 --- /dev/null +++ b/eugenium_mmd.py @@ -0,0 +1,240 @@ +''' +Code taken from: https://github.com/eugenium/mmd +(modified slightly for efficiency/PEP by Stephanie Hyland) + +Python implementation of MMD and Covariance estimates for Relative MMD + +Some code is based on code from Vincent Van Asch +which is based on matlab code from Arthur Gretton + + +Eugene Belilovsky +eugene.belilovsky@inria.fr +''' +import numpy as np +import scipy as sp +from numpy import sqrt +from sklearn.metrics.pairwise import rbf_kernel +from functools import partial +import pdb + +def my_kernel(X, Y, sigma): + gamma = 1 / (2 * sigma**2) + if len(X.shape) == 2: + X_sqnorms = np.einsum('...i,...i', X, X) + Y_sqnorms = np.einsum('...i,...i', Y, Y) + XY = np.einsum('ia,ja', X, Y) + elif len(X.shape) == 3: + X_sqnorms = np.einsum('...ij,...ij', X, X) + Y_sqnorms = np.einsum('...ij,...ij', Y, Y) + XY = np.einsum('iab,jab', X, Y) + else: + pdb.set_trace() + Kxy = np.exp(-gamma*(X_sqnorms.reshape(-1, 1) - 2*XY + Y_sqnorms.reshape(1, -1))) + return Kxy + +def MMD_3_Sample_Test(X, Y, Z, sigma=-1, SelectSigma=True, computeMMDs=False): + '''Performs the relative MMD test which returns a test statistic for whether Y is closer to X or than Z. + See http://arxiv.org/pdf/1511.04581.pdf + The bandwith heuristic is based on the median heuristic (see Smola,Gretton). + ''' + if(sigma<0): + #Similar heuristics + if SelectSigma: + siz=np.min((1000, X.shape[0])) + sigma1=kernelwidthPair(X[0:siz], Y[0:siz]); + sigma2=kernelwidthPair(X[0:siz], Z[0:siz]); + sigma=(sigma1+sigma2)/2. + else: + siz=np.min((1000, X.shape[0]*3)) + Zem=np.r_[X[0:siz/3], Y[0:siz/3], Z[0:siz/3]] + sigma=kernelwidth(Zem); + + #kernel = partial(rbf_kernel, gamma=1.0/(sigma**2)) + kernel = partial(my_kernel, sigma=sigma) + #kernel = partial(grbf, sigma=sigma) + + Kyy = kernel(Y, Y) + Kzz = kernel(Z, Z) + Kxy = kernel(X, Y) + Kxz = kernel(X, Z) + + Kyynd = Kyy-np.diag(np.diagonal(Kyy)) + Kzznd = Kzz-np.diag(np.diagonal(Kzz)) + m = Kxy.shape[0]; + n = Kyy.shape[0]; + r = Kzz.shape[0]; + + + u_yy=np.sum(Kyynd)*( 1./(n*(n-1)) ) + u_zz=np.sum(Kzznd)*( 1./(r*(r-1)) ) + u_xy=np.sum(Kxy)/(m*n) + u_xz=np.sum(Kxz)/(m*r) + #Compute the test statistic + t=u_yy - 2.*u_xy - (u_zz-2.*u_xz) + Diff_Var, Diff_Var_z2, data=MMD_Diff_Var(Kyy, Kzz, Kxy, Kxz) + + pvalue=sp.stats.norm.cdf(-t/np.sqrt((Diff_Var))) + # pvalue_z2=sp.stats.norm.cdf(-t/np.sqrt((Diff_Var_z2))) + tstat=t/sqrt(Diff_Var) + + if(computeMMDs): + Kxx = kernel(X, X) + Kxxnd = Kxx-np.diag(np.diagonal(Kxx)) + u_xx=np.sum(Kxxnd)*( 1./(m*(m-1)) ) + MMDXY=u_xx+u_yy-2.*u_xy + MMDXZ=u_xx+u_zz-2.*u_xz + else: + MMDXY=None + MMDXZ=None + return pvalue, tstat, sigma, MMDXY, MMDXZ + +def MMD_Diff_Var(Kyy, Kzz, Kxy, Kxz): + ''' + Compute the variance of the difference statistic MMDXY-MMDXZ + See http://arxiv.org/pdf/1511.04581.pdf Appendix for derivations + ''' + m = Kxy.shape[0]; + n = Kyy.shape[0]; + r = Kzz.shape[0]; + + + Kyynd = Kyy-np.diag(np.diagonal(Kyy)); + Kzznd = Kzz-np.diag(np.diagonal(Kzz)); + + u_yy=np.sum(Kyynd)*( 1./(n*(n-1)) ); + u_zz=np.sum(Kzznd)*( 1./(r*(r-1)) ); + u_xy=np.sum(Kxy)/(m*n); + u_xz=np.sum(Kxz)/(m*r); + + #compute zeta1 + t1=(1./n**3)*np.sum(Kyynd.T.dot(Kyynd))-u_yy**2; + t2=(1./(n**2*m))*np.sum(Kxy.T.dot(Kxy))-u_xy**2; + t3=(1./(n*m**2))*np.sum(Kxy.dot(Kxy.T))-u_xy**2; + t4=(1./r**3)*np.sum(Kzznd.T.dot(Kzznd))-u_zz**2; + t5=(1./(r*m**2))*np.sum(Kxz.dot(Kxz.T))-u_xz**2; + t6=(1./(r**2*m))*np.sum(Kxz.T.dot(Kxz))-u_xz**2; + t7=(1./(n**2*m))*np.sum(Kyynd.dot(Kxy.T))-u_yy*u_xy; + t8=(1./(n*m*r))*np.sum(Kxy.T.dot(Kxz))-u_xz*u_xy; + t9=(1./(r**2*m))*np.sum(Kzznd.dot(Kxz.T))-u_zz*u_xz; + + zeta1=(t1+t2+t3+t4+t5+t6-2.*(t7+t8+t9)); + + zeta2=(1/m/(m-1))*np.sum((Kyynd-Kzznd-Kxy.T-Kxy+Kxz+Kxz.T)**2)-(u_yy - 2.*u_xy - (u_zz-2.*u_xz))**2; + + + data=dict({'t1':t1, + 't2':t2, + 't3':t3, + 't4':t4, + 't5':t5, + 't6':t6, + 't7':t7, + 't8':t8, + 't9':t9, + 'zeta1':zeta1, + 'zeta2':zeta2, + }) + #TODO more precise version for zeta2 + # xx=(1/m^2)*sum(sum(Kxxnd.*Kxxnd))-u_xx^2; + # yy=(1/n^2)*sum(sum(Kyynd.*Kyynd))-u_yy^2; + #xy=(1/(n*m))*sum(sum(Kxy.*Kxy))-u_xy^2; + #xxy=(1/(n*m^2))*sum(sum(Kxxnd*Kxy))-u_xx*u_xy; + #yyx=(1/(n^2*m))*sum(sum(Kyynd*Kxy'))-u_yy*u_xy; + #zeta2=(xx+yy+xy+xy-2*(xxy+xxy +yyx+yyx)) + + + Var=(4.*(m-2)/(m*(m-1)))*zeta1; + Var_z2=Var+(2./(m*(m-1)))*zeta2; + + return Var, Var_z2, data +def grbf(x1, x2, sigma): + '''Calculates the Gaussian radial base function kernel''' + n, nfeatures = x1.shape + m, mfeatures = x2.shape + + k1 = np.sum((x1*x1), 1) + q = np.tile(k1, (m, 1)).transpose() + del k1 + + k2 = np.sum((x2*x2), 1) + r = np.tile(k2.T, (n, 1)) + del k2 + + h = q + r + del q, r + + # The norm + h = h - 2*np.dot(x1, x2.transpose()) + h = np.array(h, dtype=float) + + return np.exp(-1.*h/(2.*pow(sigma, 2))) + + +def kernelwidthPair(x1, x2): + '''Implementation of the median heuristic. See Gretton 2012 + Pick sigma such that the exponent of exp(- ||x-y|| / (2*sigma2)), + in other words ||x-y|| / (2*sigma2), equals 1 for the median distance x + and y of all distances between points from both data sets X and Y. + ''' + n, nfeatures = x1.shape + m, mfeatures = x2.shape + + k1 = np.sum((x1*x1), 1) + q = np.tile(k1, (m, 1)).transpose() + del k1 + + k2 = np.sum((x2*x2), 1) + r = np.tile(k2, (n, 1)) + del k2 + + h= q + r + del q, r + + # The norm + h = h - 2*np.dot(x1, x2.transpose()) + h = np.array(h, dtype=float) + + mdist = np.median([i for i in h.flat if i]) + + sigma = sqrt(mdist/2.0) + if not sigma: sigma = 1 + + return sigma +def kernelwidth(Zmed): + '''Alternative median heuristic when we cant partition the points + ''' + m= Zmed.shape[0] + k1 = np.expand_dims(np.sum((Zmed*Zmed), axis=1), 1) + q = np.kron(np.ones((1, m)), k1) + r = np.kron(np.ones((m, 1)), k1.T) + del k1 + + h= q + r + del q, r + + # The norm + h = h - 2.*Zmed.dot(Zmed.T) + h = np.array(h, dtype=float) + + mdist = np.median([i for i in h.flat if i]) + + sigma = sqrt(mdist/2.0) + if not sigma: sigma = 1 + + return sigma + + + +def MMD_unbiased(Kxx, Kyy, Kxy): +#The estimate when distribution of x is not equal to y + m = Kxx.shape[0] + n = Kyy.shape[0] + + t1 = (1./(m*(m-1)))*np.sum(Kxx - np.diag(np.diagonal(Kxx))) + t2 = (2./(m*n)) * np.sum(Kxy) + t3 = (1./(n*(n-1)))* np.sum(Kyy - np.diag(np.diagonal(Kyy))) + + MMDsquared = (t1-t2+t3) + + return MMDsquared diff --git a/eval.py b/eval.py new file mode 100644 index 0000000..be41e7d --- /dev/null +++ b/eval.py @@ -0,0 +1,728 @@ +#!/usr/bin/env ipython +# Evaluation of models +# + +import json +import pdb +import numpy as np +import pandas as pd +from eugenium_mmd import MMD_3_Sample_Test +from scipy.stats import ks_2samp +import mmd +from sklearn.svm import SVC +from sklearn.metrics import classification_report, precision_recall_fscore_support, accuracy_score, roc_auc_score, average_precision_score +from sklearn.ensemble import RandomForestClassifier +import sklearn +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt + +# for keras +import keras +from keras.models import Sequential +from keras.layers import Dense, Dropout, Flatten +from keras.layers import Conv2D, MaxPooling2D +from keras.backend import clear_session + +import model +import data_utils +import plotting + +import pickle + +def assert_same_data(A, B): + # case 0, both loaded + if A['data'] == 'load' and B['data'] == 'load': + assert A['data_load_from'] == B['data_load_from'] + data_path = './experiments/data/' + A['data_load_from'] + elif A['data'] == 'load' and (not B['data'] == 'load'): + assert A['data_load_from'] == B['identifier'] + data_path = './experiments/data/' + A['data_load_from'] + elif (not A['data'] == 'load') and B['data'] == 'load': + assert B['data_load_from'] == A['identifier'] + data_path = './experiments/data/' + A['identifier'] + else: + raise ValueError(A['data'], B['data']) + return data_path + +def model_memorisation(identifier, epoch, max_samples=2000, tstr=False): + """ + Compare samples from a model against training set and validation set in mmd + """ + if tstr: + print('Loading data from TSTR experiment (not sampling from model)') + # load pre-generated samples + synth_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item() + model_samples = synth_data['samples'] + synth_labels = synth_data['labels'] + # load real data used in that experiment + real_data = np.load('./experiments/data/' + identifier + '.data.npy').item() + real_samples = real_data['samples'] + train = real_samples['train'] + test = real_samples['test'] + n_samples = test.shape[0] + if model_samples.shape[0] > n_samples: + model_samples = np.random.permutation(model_samples)[:n_samples] + print('Data loaded successfully!') + else: + if identifier == 'cristobal_eICU': + model_samples = pickle.load(open('REDACTED', 'rb')) + samples, labels = data_utils.eICU_task() + train = samples['train'].reshape(-1,16,4) + vali = samples['vali'].reshape(-1,16,4) + test = samples['test'].reshape(-1,16,4) + #train_targets = labels['train'] + #vali_targets = labels['vali'] + #test_targets = labels['test'] + train, vali, test = data_utils.scale_data(train, vali, test) + n_samples = test.shape[0] + if n_samples > max_samples: + n_samples = max_samples + test = np.random.permutation(test)[:n_samples] + if model_samples.shape[0] > n_samples: + model_samples = np.random.permutation(model_samples)[:n_samples] + elif identifier == 'cristobal_MNIST': + the_dir = 'REDACTED' + # pick a random one + which = np.random.choice(['NEW_OK_', '_r4', '_r5', '_r6', '_r7']) + model_samples, model_labels = pickle.load(open(the_dir + 'synth_mnist_minist_cdgan_1_2_100_multivar_14_nolr_rdim3_0_2_' + which + '_190.pk', 'rb')) + # get test and train... + # (generated with fixed seed...) + mnist_resized_dim = 14 + samples, labels = data_utils.load_resized_mnist(mnist_resized_dim) + proportions = [0.6, 0.2, 0.2] + train, vali, test, labels_split = data_utils.split(samples, labels=labels, random_seed=1, proportions=proportions) + np.random.seed() + train = train.reshape(-1, 14, 14) + test = test.reshape(-1, 14, 14) + vali = vali.reshape(-1, 14, 14) + n_samples = test.shape[0] + if n_samples > max_samples: + n_samples = max_samples + test = np.random.permutation(test)[:n_samples] + if model_samples.shape[0] > n_samples: + model_samples = np.random.permutation(model_samples)[:n_samples] + else: + settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r')) + # get the test, train sets + data = np.load('./experiments/data/' + identifier + '.data.npy').item() + train = data['samples']['train'] + test = data['samples']['test'] + n_samples = test.shape[0] + if n_samples > max_samples: + n_samples = max_samples + test = np.random.permutation(test)[:n_samples] + model_samples = model.sample_trained_model(settings, epoch, n_samples) + all_samples = np.vstack([train, test, model_samples]) + heuristic_sigma = mmd.median_pairwise_distance(all_samples) + print('heuristic sigma:', heuristic_sigma) + pvalue, tstat, sigma, MMDXY, MMDXZ = MMD_3_Sample_Test(model_samples, test, np.random.permutation(train)[:n_samples], sigma=heuristic_sigma, computeMMDs=False) + #pvalue, tstat, sigma, MMDXY, MMDXZ = MMD_3_Sample_Test(model_samples, np.random.permutation(train)[:n_samples], test, sigma=heuristic_sigma, computeMMDs=False) +# if pvalue < 0.05: +# print('At confidence level 0.05, we reject the null hypothesis that MMDXY <= MMDXZ, and conclude that the test data has a smaller MMD with the true data than the generated data') + # the function takes (X, Y, Z) as its first arguments, it's testing if MMDXY (i.e. MMD between model and train) is less than MMDXZ (MMd between model and test) +# else: +# print('We have failed to reject the null hypothesis that MMDXY <= MMDXZ, and cannot conclu#de that the test data has a smaller MMD with the true data than the generated data') + return pvalue, tstat, sigma + +def model_comparison(identifier_A, identifier_B, epoch_A=99, epoch_B=99): + """ + Compare two models using relative MMD test + """ + # make sure they used the same data + settings_A = json.load(open('./experiments/settings/' + identifier_A + '.txt', 'r')) + settings_B = json.load(open('./experiments/settings/' + identifier_B + '.txt', 'r')) + data_path = assert_same_data(settings_A, settings_B) + # now load the data + data = np.load(data_path + '.data.npy').item()['samples']['vali'] + n_samples = data.shape[0] + A_samples = model.sample_trained_model(settings_A, epoch_A, n_samples) + B_samples = model.sample_trained_model(settings_B, epoch_B, n_samples) + # do the comparison + # TODO: support multiple signals + ## some notes about this test: + ## MMD_3_Sample_Test(X, Y, Z) tests the hypothesis that Px is closer to Pz than Py + ## that is, test the null hypothesis H0: + ## MMD(F, Px, Py) <= MMD(F, Px, Pz) + ## versus the alternate hypothesis: + ## MMD(F, Px, Py) > MMD(F, Px, Pz) + ## at significance level that we select later (just the threshold on the p-value) + pvalue, tstat, sigma, MMDXY, MMDXZ = MMD_3_Sample_Test(data[:, :, 0], A_samples[:, :, 0], B_samples[:, :, 0], computeMMDs=True) + print(pvalue, tstat, sigma) + if pvalue < 0.05: + print('At confidence level 0.05, we reject the null hypothesis that MMDXY <= MMDXZ, and conclude that', identifier_B, 'has a smaller MMD with the true data than', identifier_A) + else: + print('We have failed to reject the null hypothesis that MMDXY <= MMDXZ, and cannot conclude that', identifier_B, 'has a smaller MMD with the true data than', identifier_A) + return pvalue, tstat, sigma, MMDXY, MMDXZ + +# --- to do with reconstruction --- # + +def get_reconstruction_errors(identifier, epoch, g_tolerance=0.05, max_samples=1000, rerun=False, tstr=False): + """ + Get the reconstruction error of every point in the training set of a given + experiment. + """ + settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r')) + if settings['data_load_from']: + data_dict = np.load('./experiments/data/' + settings['data_load_from'] + '.data.npy').item() + else: + data_dict = np.load('./experiments/data/' + identifier + '.data.npy').item() + samples = data_dict['samples'] + train = samples['train'] + vali = samples['vali'] + test = samples['test'] + labels = data_dict['labels'] + train_labels, test_labels, synth_labels, vali_labels = None, None, None, None + try: + if rerun: + raise FileNotFoundError + errors = np.load('./experiments/eval/' + identifier + '_' + str(epoch) + '_' + str(g_tolerance) + '.reconstruction_errors.npy').item() + train_errors = errors['train'] + test_errors = errors['test'] + generated_errors = errors['generated'] + noisy_errors = errors['noisy'] + print('Loaded precomputed errors') + except FileNotFoundError: + if tstr: + synth_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item() + generated = synth_data['samples'] + synth_labels = synth_data['labels'] + train_labels = labels['train'] + test_labels = labels['test'] + vali_labels = labels['vali'] + else: + # generate new data + n_eval = 500 + # generate "easy" samples from the distribution + generated = model.sample_trained_model(settings, epoch, n_eval) + # generate "hard' random samples, not from train/test distribution + # TODO: use original validation examples, add noise etc. + ## random_samples = np.random.normal(size=generated.shape) + # random_samples -= np.mean(random_samples, axis=0) + # random_samples += np.mean(vali, axis=0) + # random_samples /= np.std(random_samples, axis=0) + # random_samples *= np.std(vali, axis=0) + + # get all the errors + print('Getting reconstruction errors on train set') + if train.shape[0] > max_samples: + index_subset = np.random.permutation(train.shape[0])[:max_samples] + train = train[index_subset] + if train_labels is not None: + train_labels = train_labels[index_subset] + train_errors = error_per_sample(identifier, epoch, train, n_rep=5, g_tolerance=g_tolerance, C_samples=train_labels) + print('Getting reconstruction errors on test set') + if test.shape[0] > max_samples: + index_subset = np.random.permutation(test.shape[0])[:max_samples] + test = test[index_subset] + if test_labels is not None: + test_labels = test_labels[index_subset] + test_errors = error_per_sample(identifier, epoch, test, n_rep=5, g_tolerance=g_tolerance, C_samples=test_labels) + D_test, p_test = ks_2samp(train_errors, test_errors) + print('KS statistic and p-value for train v. test erors:', D_test, p_test) + pdb.set_trace() + print('Getting reconstruction errors on generated set') + generated_errors = error_per_sample(identifier, epoch, generated, n_rep=5, g_tolerance=g_tolerance, C_samples=synth_labels) + D_gen, p_gen = ks_2samp(generated_errors, train_errors) + print('KS statistic and p-value for train v. gen erors:', D_gen, p_gen) + D_gentest, p_gentest = ks_2samp(generated_errors, test_errors) + print('KS statistic and p-value for gen v. test erors:', D_gentest, p_gentest) +# print('Getting reconstruction errors on noisy set') +# alpha = 0.5 +# noisy_samples = alpha*vali + (1-alpha)*np.random.permutation(vali) +# noisy_errors = error_per_sample(identifier, epoch, noisy_samples, n_rep=5, g_tolerance=g_tolerance, C_samples=vali_labels) + noisy_errors = None + # save! + errors = {'train': train_errors, 'test': test_errors, 'generated': generated_errors, 'noisy': noisy_errors} + np.save('./experiments/eval/' + identifier + '_' + str(epoch) + '_' + str(g_tolerance) + '.reconstruction_errors.npy', errors) + # do two-sample Kolomogorov-Smirnov test for equality + D_test, p_test = ks_2samp(train_errors, test_errors) + print('KS statistic and p-value for train v. test erors:', D_test, p_test) + D_gen, p_gen = ks_2samp(generated_errors, train_errors) + print('KS statistic and p-value for train v. gen erors:', D_gen, p_gen) + D_gentest, p_gentest = ks_2samp(generated_errors, test_errors) + print('KS statistic and p-value for gen v. test erors:', D_gentest, p_gentest) + # visualise distribution of errors for train and test + plotting.reconstruction_errors(identifier + '_' + str(epoch) + '_' + str(g_tolerance), train_errors, test_errors, generated_errors, noisy_errors) + # visualise the "hardest" and "easiest" samples from train + ranking_train = np.argsort(train_errors) + easiest_train = ranking_train[:6] + hardest_train = ranking_train[-6:] + plotting.save_plot_sample(train[easiest_train], epoch, identifier + '_easytrain', n_samples=6, num_epochs=None, ncol=2) + plotting.save_plot_sample(train[hardest_train], epoch, identifier + '_hardtrain', n_samples=6, num_epochs=None, ncol=2) + # visualise the "hardest" and "easiest" samples from random +# ranking_random = np.argsort(noisy_errors) +# easiest_random = ranking_random[:6] +# hardest_random = ranking_random[-6:] +# plotting.save_plot_sample(random_samples[easiest_random], epoch, identifier + '_easyrandom', n_samples=6, num_epochs=None, ncol=2) +# plotting.save_plot_sample(random_samples[hardest_random], epoch, identifier + '_hardrandom', n_samples=6, num_epochs=None, ncol=2) + return True + +def error_per_sample(identifier, epoch, samples, n_rep=3, n_iter=None, g_tolerance=0.025, use_min=True, C_samples=None): + """ + Get (average over a few runs) of the reconstruction error per sample + """ + n_samples = samples.shape[0] + heuristic_sigma = np.float32(mmd.median_pairwise_distance(samples)) + errors = np.zeros(shape=(n_samples, n_rep)) + for rep in range(n_rep): + Z, rep_errors, sigma = model.invert(identifier, epoch, samples, n_iter=n_iter, heuristic_sigma=heuristic_sigma, g_tolerance=g_tolerance, C_samples=C_samples) + errors[:, rep] = rep_errors + # return min, or average? + if use_min: + errors = np.min(errors, axis=1) + else: + # use mean + errors = np.mean(errors, axis=1) + return errors + +# --- visualisation evaluation --- # + +def view_digit(identifier, epoch, digit, n_samples=6): + """ + Generate a bunch of MNIST digits from a CGAN, view them + """ + settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r')) + if settings['one_hot']: + assert settings['max_val'] == 1 + assert digit <= settings['cond_dim'] + C_samples = np.zeros(shape=(n_samples, settings['cond_dim'])) + C_samples[:, digit] = 1 + else: + assert settings['cond_dim'] == 1 + assert digit <= settings['max_val'] + C_samples = np.array([digit]*n_samples).reshape(-1, 1) + digit_samples = model.sample_trained_model(settings, epoch, n_samples, Z_samples=None, cond_dim=settings['cond_dim'], C_samples=C_samples) + digit_samples = digit_samples.reshape(n_samples, -1, 1) + # visualise + plotting.save_mnist_plot_sample(digit_samples, digit, identifier + '_' + str(epoch) + '_digit_', n_samples) + return True + +def view_interpolation(identifier, epoch, n_steps=6, input_samples=None, e_tolerance=0.01, sigma=3.29286853021): + """ + If samples: generate interpolation between real points + Else: + Sample two points in the latent space, view a linear interpolation between them. + """ + settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r')) + if input_samples is None: + # grab two trainng examples + data = np.load('./experiments/data/' + identifier + '.data.npy').item() + train = data['samples']['train'] + input_samples = np.random.permutation(train)[:2] +# Z_sampleA, Z_sampleB = model.sample_Z(2, settings['seq_length'], settings['latent_dim'], +# settings['use_time']) + if sigma is None: + ## gotta get a sigma somehow + sigma = mmd.median_pairwise_distance(train) + print('Calcualted heuristic sigma from training data:', sigma) + Zs, error, _ = model.invert(settings, epoch, input_samples, e_tolerance=e_tolerance) + Z_sampleA, Z_sampleB = Zs + Z_samples = plotting.interpolate(Z_sampleA, Z_sampleB, n_steps=n_steps) + samples = model.sample_trained_model(settings, epoch, Z_samples.shape[0], Z_samples) + # get distances from generated samples to target samples + d_A, d_B = [], [] + for sample in samples: + d_A.append(sample_distance(sample, samples[0], sigma)) + d_B.append(sample_distance(sample, samples[-1], sigma)) + distances = pd.DataFrame({'dA': d_A, 'dB': d_B}) + plotting.save_plot_interpolate(input_samples, samples, epoch, settings['identifier'] + '_epoch' + str(epoch), distances=distances, sigma=sigma) + return True + +def view_latent_vary(identifier, epoch, n_steps=6): + settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r')) + Z_sample = model.sample_Z(1, settings['seq_length'], settings['latent_dim'], + settings['use_time'])[0] + samples_dim = [] + for dim in range(settings['latent_dim']): + Z_samples_dim = plotting.vary_latent_dimension(Z_sample, dim, n_steps) + samples_dim.append(model.sample_trained_model(settings, epoch, Z_samples_dim.shape[0], Z_samples_dim)) + plotting.save_plot_vary_dimension(samples_dim, epoch, settings['identifier'] + '_varydim', n_dim=settings['latent_dim']) + return True + +def view_reconstruction(identifier, epoch, real_samples, tolerance=1): + """ + Given a set of real samples, find the "closest" latent space points + corresponding to them, generate samples from these, visualise! + """ + settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r')) + Zs, error, sigma = model.invert(settings, epoch, real_samples, tolerance=tolerance) + plotting.visualise_latent(Zs[0], identifier+'_' + str(epoch) + '_0') + plotting.visualise_latent(Zs[1], identifier+'_' + str(epoch) + '_1') + model_samples = model.sample_trained_model(settings, epoch, Zs.shape[0], Zs) + plotting.save_plot_reconstruct(real_samples, model_samples, settings['identifier']) + return True + +def view_fixed(identifier, epoch, n_samples=6, dim=None): + """ What happens when we give the same point at each time step? """ + settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r')) + Z_samples = model.sample_Z(n_samples, settings['seq_length'], settings['latent_dim'], + settings['use_time']) + # now, propagate forward the value at time 0 (which time doesn't matter) + for i in range(1, settings['seq_length']): + if dim is None: + Z_samples[:, i, :] = Z_samples[:, 0, :] + else: + Z_samples[:, i, dim] = Z_samples[:, 0, dim] + # now generate + samples = model.sample_trained_model(settings, epoch, n_samples, Z_samples) + # now visualise + plotting.save_plot_sample(samples, epoch, identifier + '_fixed', n_samples) + return True + +def view_params(identifier, epoch): + """ Visualise weight matrices in the GAN """ + settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r')) + parameters = model.load_parameters(identifier + '_' + str(epoch)) + plotting.plot_parameters(parameters, identifier + '_' + str(epoch)) + return True + +# --- to do with samples --- # + +def sample_distance(sampleA, sampleB, sigma): + """ + I know this isn't the best distance measure, alright. + """ + # RBF! + gamma = 1 / (2 * sigma**2) + similarity = np.exp(-gamma*(np.linalg.norm(sampleA - sampleB)**2)) + distance = 1 - similarity + return distance + +### --- TSTR ---- ### + +def train_CNN(train_X, train_Y, vali_X, vali_Y, test_X): + """ + Train a CNN (code copied/adapted from Cristobal's mnist_keras_trts_0_2) + (ONLY MNIST, ONLY 14x14) + (ONLY DIGITS UP TO 3) + """ + print('Training CNN!') + input_shape = (14,14,1) + batch_size = 128 + num_classes = 3 + epochs = 1000 + + m = Sequential() + m.add(Conv2D(16, kernel_size=(3, 3), + activation='relu', + input_shape=input_shape)) + m.add(Conv2D(32, (3, 3), activation='relu')) + m.add(MaxPooling2D(pool_size=(2, 2))) + m.add(Dropout(0.25)) + m.add(Flatten()) + m.add(Dense(128, activation='relu')) + m.add(Dropout(0.5)) + m.add(Dense(num_classes, activation='softmax')) + + m.compile(loss=keras.losses.categorical_crossentropy, + optimizer=keras.optimizers.Adadelta(), + metrics=['accuracy']) + + earlyStopping=keras.callbacks.EarlyStopping(monitor='val_loss', patience=0, verbose=1, mode='auto') + m.fit(np.expand_dims(train_X, axis=-1), train_Y, + batch_size=batch_size, + epochs=epochs, + verbose=1, + validation_data=(np.expand_dims(vali_X, axis=-1), vali_Y), + callbacks=[earlyStopping]) + test_predictions = m.predict(np.expand_dims(test_X, axis=-1)) + return test_predictions + +def TSTR_mnist(identifier, epoch, generate=True, duplicate_synth=1, vali=True, CNN=False, reverse=False): + """ + Either load or generate synthetic training, real test data... + Load synthetic training, real test data, do multi-class SVM + (basically just this: http://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html) + + If reverse = True: do TRTS + """ + print('Running TSTR on', identifier, 'at epoch', epoch) + if vali: + test_set = 'vali' + else: + test_set = 'test' + if generate: + data = np.load('./experiments/data/' + identifier + '.data.npy').item() + samples = data['samples'] + train_X = samples['train'] + test_X = samples[test_set] + labels = data['labels'] + train_Y = labels['train'] + test_Y = labels[test_set] + # now sample from the model + synth_Y = np.tile(train_Y, [duplicate_synth, 1]) + synth_X = model.sample_trained_model(identifier, epoch, num_samples=synth_Y.shape[0], C_samples=synth_Y) + # for use in TRTS + synth_testX = model.sample_trained_model(identifier, epoch, num_samples=test_Y.shape[0], C_samples=test_Y) + synth_data = {'samples': synth_X, 'labels': synth_Y, 'test_samples': synth_testX, 'test_labels': test_Y} + np.save('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy', synth_data) + else: + print('Loading synthetic data from pre-sampled model') + exp_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item() + test_X, test_Y = exp_data['test_data'], exp_data['test_labels'] + train_X, train_Y = exp_data['train_data'], exp_data['train_labels'] + synth_X, synth_Y = exp_data['synth_data'], exp_data['synth_labels'] + if reverse: + which_setting = 'trts' + print('Swapping synthetic test set in for real, to do TRTS!') + test_X = synth_testX + else: + print('Doing normal TSTR') + which_setting = 'tstr' + # make classifier + if not CNN: + model_choice = 'RF' + # if multivariate, reshape + if len(test_X.shape) == 3: + test_X = test_X.reshape(test_X.shape[0], -1) + if len(train_X.shape) == 3: + train_X = train_X.reshape(train_X.shape[0], -1) + if len(synth_X.shape) == 3: + synth_X = synth_X.reshape(synth_X.shape[0], -1) + # if one hot, fix + if len(synth_Y.shape) > 1 and not synth_Y.shape[1] == 1: + synth_Y = np.argmax(synth_Y, axis=1) + train_Y = np.argmax(train_Y, axis=1) + test_Y = np.argmax(test_Y, axis=1) + # random forest + #synth_classifier = SVC(gamma=0.001) + #real_classifier = SVC(gamma=0.001) + synth_classifier = RandomForestClassifier(n_estimators=500) + real_classifier = RandomForestClassifier(n_estimators=500) + # fit + real_classifier.fit(train_X, train_Y) + synth_classifier.fit(synth_X, synth_Y) + # test on real + synth_predY = synth_classifier.predict(test_X) + real_predY = real_classifier.predict(test_X) + else: + model_choice = 'CNN' + synth_predY = train_CNN(synth_X, synth_Y, samples['vali'], labels['vali'], test_X) + clear_session() + real_predY = train_CNN(train_X, train_Y, samples['vali'], labels['vali'], test_X) + clear_session() + # CNN setting is all 'one-hot' + test_Y = np.argmax(test_Y, axis=1) + synth_predY = np.argmax(synth_predY, axis=1) + real_predY = np.argmax(real_predY, axis=1) + + # report on results + synth_prec, synth_recall, synth_f1, synth_support = precision_recall_fscore_support(test_Y, synth_predY, average='weighted') + synth_accuracy = accuracy_score(test_Y, synth_predY) + synth_auprc = 'NaN' + synth_auroc = 'NaN' + synth_scores = [synth_prec, synth_recall, synth_f1, synth_accuracy, synth_auprc, synth_auroc] + real_prec, real_recall, real_f1, real_support = precision_recall_fscore_support(test_Y, real_predY, average='weighted') + real_accuracy = accuracy_score(test_Y, real_predY) + real_auprc = 'NaN' + real_auroc = 'NaN' + real_scores = [real_prec, real_recall, real_f1, real_accuracy, real_auprc, real_auroc] + + all_scores = synth_scores + real_scores + + if vali: + report_file = open('./experiments/tstr/vali.' + which_setting + '_report.v3.csv', 'a') + report_file.write('mnist,' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n') + report_file.close() + else: + report_file = open('./experiments/tstr/' + which_setting + '_report.v3.csv', 'a') + report_file.write('mnist,' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n') + report_file.close() + # visualise results + try: + plotting.view_mnist_eval(identifier + '_' + str(epoch), train_X, train_Y, synth_X, synth_Y, test_X, test_Y, synth_predY, real_predY) + except ValueError: + print('PLOTTING ERROR') + pdb.set_trace() + print(classification_report(test_Y, synth_predY)) + print(classification_report(test_Y, real_predY)) + return synth_f1, real_f1 + +def TSTR_eICU(identifier, epoch, generate=True, vali=True, CNN=False, do_OR=False, duplicate_synth=1, reverse=False): + """ + """ + if vali: + test_set = 'vali' + else: + test_set = 'test' + data = np.load('./experiments/data/' + identifier + '.data.npy').item() + samples = data['samples'] + train_X = samples['train'] + test_X = samples[test_set] + labels = data['labels'] + train_Y = labels['train'] + test_Y = labels[test_set] + if generate: + # now sample from the model + synth_Y = np.tile(train_Y, [duplicate_synth, 1]) + synth_X = model.sample_trained_model(identifier, epoch, num_samples=synth_Y.shape[0], C_samples=synth_Y) + # for use in TRTS + synth_testX = model.sample_trained_model(identifier, epoch, num_samples=test_Y.shape[0], C_samples=test_Y) + synth_data = {'samples': synth_X, 'labels': synth_Y, 'test_samples': synth_testX, 'test_labels': test_Y} + np.save('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy', synth_data) + else: + print('Loading pre-generated data') + print('WARNING: not implemented for TRTS') + # get "train" data + exp_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item() + synth_X = exp_data['samples'] + synth_Y = exp_data['labels'] + n_synth = synth_X.shape[0] + synth_X = synth_X.reshape(n_synth, -1) + # pdb.set_trace() + # # ALERT ALERT MODIFYING + # synth_X = 2*(synth_X > 0) - 1 + orig_data = np.load('/cluster/home/hyland/eICU_task_data.npy').item() + if reverse: + which_setting = 'trts' + # visualise distribution of errors for train and test + print('Swapping synthetic test set in for real, to do TRTS!') + test_X = synth_testX + else: + print('Doing normal TSTR') + which_setting = 'tstr' +# # get test data +# test_X = data['test_X'] +# test_Y = data['test_Y'] + if not CNN: + model_choice = 'RF' + # if multivariate, reshape + if len(test_X.shape) == 3: + test_X = test_X.reshape(test_X.shape[0], -1) + if len(train_X.shape) == 3: + train_X = train_X.reshape(train_X.shape[0], -1) + if len(synth_X.shape) == 3: + synth_X = synth_X.reshape(synth_X.shape[0], -1) + else: + raise ValueError(CNN) + model_choice = 'CNN' + # we will select the best validation set epoch based on F1 score, take average across all the tasks + score_list = [] + for label in range(synth_Y.shape[1]): + task = orig_data['Y_columns'][label] + if vali: + if not task in ['low_sao2', 'high_heartrate', 'low_respiration']: + print('Skipping task', task, 'because validation evaluation.') + continue + print('Evaluating on task:', task) + #print('(', np.mean(synth_Y[:, label]), 'positive in train, ', np.mean(test_Y[:, label]), 'in test)') + #m = RandomForestClassifier(n_estimators=50).fit(synth_X, synth_Y[:, label]) + #m = SVC(gamma=0.001).fit(synth_X, synth_Y[:, label]) + synth_classifier = RandomForestClassifier(n_estimators=100).fit(synth_X, synth_Y[:, label]) + synth_predY = synth_classifier.predict(test_X) + synth_predY_prob = synth_classifier.predict_proba(test_X)[:, 1] + real_classifier = RandomForestClassifier(n_estimators=100).fit(train_X, train_Y[:, label]) + real_predY = real_classifier.predict(test_X) + real_predY_prob = real_classifier.predict_proba(test_X)[:, 1] + #print('(predicted', np.mean(predict), 'positive labels)') + + synth_prec, synth_recall, synth_f1, synth_support = precision_recall_fscore_support(test_Y[:, label], synth_predY, average='weighted') + synth_accuracy = accuracy_score(test_Y[:, label], synth_predY) + synth_auprc = average_precision_score(test_Y[:, label], synth_predY_prob) + synth_auroc = roc_auc_score(test_Y[:, label], synth_predY_prob) + synth_scores = [synth_prec, synth_recall, synth_f1, synth_accuracy, synth_auprc, synth_auroc] + + real_prec, real_recall, real_f1, real_support = precision_recall_fscore_support(test_Y[:, label], real_predY, average='weighted') + real_accuracy = accuracy_score(test_Y[:, label], real_predY) + real_auprc = average_precision_score(test_Y[:, label], real_predY_prob) + real_auroc = roc_auc_score(test_Y[:, label], real_predY_prob) + real_scores = [real_prec, real_recall, real_f1, real_accuracy, real_auprc, real_auroc] + + all_scores = synth_scores + real_scores + + if vali: + report_file = open('./experiments/tstr/vali.' + which_setting + '_report.v3.csv', 'a') + report_file.write('eICU_' + task + ',' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n') + report_file.close() + else: + report_file = open('./experiments/tstr/' + which_setting + '_report.v3.csv', 'a') + report_file.write('eICU_' + task + ',' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n') + report_file.close() + + print(classification_report(test_Y[:, label], synth_predY)) + print(classification_report(test_Y[:, label], real_predY)) + if task in ['low_sao2', 'high_heartrate', 'low_respiration']: + score_list.append(synth_auprc + synth_auroc) + + if do_OR: + raise NotImplementedError + # do the OR task + extreme_heartrate_test = test_Y[:, 1] + test_Y[:, 4] + extreme_respiration_test = test_Y[:, 2] + test_Y[:, 5] + extreme_systemicmean_test = test_Y[:, 3] + test_Y[:, 6] + Y_OR_test = np.vstack([extreme_heartrate_test, extreme_respiration_test, extreme_systemicmean_test]).T + Y_OR_test = (Y_OR_test > 0)*1 + + extreme_heartrate_synth = synth_Y[:, 1] + synth_Y[:, 4] + extreme_respiration_synth = synth_Y[:, 2] + synth_Y[:, 5] + extreme_systemicmean_synth = synth_Y[:, 3] + synth_Y[:, 6] + Y_OR_synth = np.vstack([extreme_heartrate_synth, extreme_respiration_synth, extreme_systemicmean_synth]).T + Y_OR_synth = (Y_OR_synth > 0)*1 + + OR_names = ['extreme heartrate', 'extreme respiration', 'extreme MAP'] + OR_results = [] + for label in range(Y_OR_synth.shape[1]): + print('task:', OR_names[label]) + print('(', np.mean(Y_OR_synth[:, label]), 'positive in train, ', np.mean(Y_OR_test[:, label]), 'in test)') + m = RandomForestClassifier(n_estimators=500).fit(synth_X, Y_OR_synth[:, label]) + predict = m.predict(X_test) + print('(predicted', np.mean(predict), 'positive labels)') + accuracy = accuracy_score(Y_OR_test[:, label], predict) + precision = sklearn.metrics.precision_score(Y_OR_test[:, label], predict) + recall = sklearn.metrics.recall_score(Y_OR_test[:, label], predict) + print(accuracy, precision, recall) + OR_results.append([accuracy, precision, recall]) + else: + OR_results = [] + + score_across_tasks = np.mean(np.array(score_list)) + return score_across_tasks + +def NIPS_toy_plot(identifier_rbf, epoch_rbf, identifier_sine, epoch_sine, identifier_mnist, epoch_mnist): + """ + for each experiment: + - plot a bunch of train examples + - sample a bunch of generated examples + - plot all in separate PDFs so i can merge in illustrator + + for sine and rbf, grey background + MNIST is just MNIST (square though) + """ + n_samples = 15 + # settings + settings_rbf = json.load(open('./experiments/settings/' + identifier_rbf + '.txt', 'r')) + settings_sine = json.load(open('./experiments/settings/' + identifier_sine + '.txt', 'r')) + settings_mnist = json.load(open('./experiments/settings/' + identifier_mnist + '.txt', 'r')) + # data + data_rbf = np.load('./experiments/data/' + identifier_rbf + '.data.npy').item() + data_sine = np.load('./experiments/data/' + identifier_sine + '.data.npy').item() + data_mnist = np.load('./experiments/data/' + identifier_mnist + '.data.npy').item() + train_rbf = data_rbf['samples']['train'] + train_sine = data_sine['samples']['train'] + train_mnist = data_mnist['samples']['train'] + # sample + samples_rbf = model.sample_trained_model(settings_rbf, epoch_rbf, n_samples) + samples_sine = model.sample_trained_model(settings_sine, epoch_sine, n_samples) + samples_mnist = model.sample_trained_model(settings_mnist, epoch_mnist, n_samples) + # plot them all + index = 0 + #for sample in np.random.permutation(train_rbf)[:n_samples]: + # plotting.nips_plot_rbf(sample, index, 'train') + # index += 1 + #for sample in samples_rbf: + # plotting.nips_plot_rbf(sample, index, 'GAN') + # index += 1 + #for sample in np.random.permutation(train_sine)[:n_samples]: + # plotting.nips_plot_sine(sample, index, 'train') + # index += 1 + #for sample in samples_sine: + # plotting.nips_plot_sine(sample, index, 'GAN') + # index += 1 + for sample in np.random.permutation(train_mnist)[:n_samples]: + plotting.nips_plot_mnist(sample, index, 'train') + index += 1 + for sample in samples_mnist: + plotting.nips_plot_mnist(sample, index, 'GAN') + index += 1 + return True diff --git a/mmd.py b/mmd.py new file mode 100644 index 0000000..c2fa363 --- /dev/null +++ b/mmd.py @@ -0,0 +1,233 @@ +''' +MMD functions implemented in tensorflow. +(from https://github.com/dougalsutherland/opt-mmd/blob/master/gan/mmd.py) +''' +from __future__ import division + +import tensorflow as tf + +from tf_ops import dot, sq_sum + +from scipy.spatial.distance import pdist +from numpy import median, vstack, einsum +import pdb +import numpy as np + +_eps=1e-8 + +################################################################################ +### Quadratic-time MMD with Gaussian RBF kernel + +def _mix_rbf_kernel(X, Y, sigmas, wts=None): + """ + """ + if wts is None: + wts = [1.0] * sigmas.get_shape()[0] + + # debug! + if len(X.shape) == 2: + # matrix + XX = tf.matmul(X, X, transpose_b=True) + XY = tf.matmul(X, Y, transpose_b=True) + YY = tf.matmul(Y, Y, transpose_b=True) + elif len(X.shape) == 3: + # tensor -- this is computing the Frobenius norm + XX = tf.tensordot(X, X, axes=[[1, 2], [1, 2]]) + XY = tf.tensordot(X, Y, axes=[[1, 2], [1, 2]]) + YY = tf.tensordot(Y, Y, axes=[[1, 2], [1, 2]]) + else: + raise ValueError(X) + + X_sqnorms = tf.diag_part(XX) + Y_sqnorms = tf.diag_part(YY) + + r = lambda x: tf.expand_dims(x, 0) + c = lambda x: tf.expand_dims(x, 1) + + K_XX, K_XY, K_YY = 0, 0, 0 + for sigma, wt in zip(tf.unstack(sigmas, axis=0), wts): + gamma = 1 / (2 * sigma**2) + K_XX += wt * tf.exp(-gamma * (-2 * XX + c(X_sqnorms) + r(X_sqnorms))) + K_XY += wt * tf.exp(-gamma * (-2 * XY + c(X_sqnorms) + r(Y_sqnorms))) + K_YY += wt * tf.exp(-gamma * (-2 * YY + c(Y_sqnorms) + r(Y_sqnorms))) + + return K_XX, K_XY, K_YY, tf.reduce_sum(wts) + + +def rbf_mmd2(X, Y, sigma=1, biased=True): + return mix_rbf_mmd2(X, Y, sigmas=[sigma], biased=biased) + + +def mix_rbf_mmd2(X, Y, sigmas=(1,), wts=None, biased=True): + K_XX, K_XY, K_YY, d = _mix_rbf_kernel(X, Y, sigmas, wts) + return _mmd2(K_XX, K_XY, K_YY, const_diagonal=d, biased=biased) + + +def rbf_mmd2_and_ratio(X, Y, sigma=1, biased=True): + return mix_rbf_mmd2_and_ratio(X, Y, sigmas=[sigma], biased=biased) + + +def mix_rbf_mmd2_and_ratio(X, Y, sigmas=(1,), wts=None, biased=True): + K_XX, K_XY, K_YY, d = _mix_rbf_kernel(X, Y, sigmas, wts) + return _mmd2_and_ratio(K_XX, K_XY, K_YY, const_diagonal=d, biased=biased) + + +################################################################################ +### Helper functions to compute variances based on kernel matrices + + +def _mmd2(K_XX, K_XY, K_YY, const_diagonal=False, biased=False): + m = tf.cast(K_XX.get_shape()[0], tf.float32) + n = tf.cast(K_YY.get_shape()[0], tf.float32) + + if biased: + mmd2 = (tf.reduce_sum(K_XX) / (m * m) + + tf.reduce_sum(K_YY) / (n * n) + - 2 * tf.reduce_sum(K_XY) / (m * n)) + else: + if const_diagonal is not False: + trace_X = m * const_diagonal + trace_Y = n * const_diagonal + else: + trace_X = tf.trace(K_XX) + trace_Y = tf.trace(K_YY) + + mmd2 = ((tf.reduce_sum(K_XX) - trace_X) / (m * (m - 1)) + + (tf.reduce_sum(K_YY) - trace_Y) / (n * (n - 1)) + - 2 * tf.reduce_sum(K_XY) / (m * n)) + + return mmd2 + + +def _mmd2_and_ratio(K_XX, K_XY, K_YY, const_diagonal=False, biased=False, + min_var_est=_eps): + mmd2, var_est = _mmd2_and_variance( + K_XX, K_XY, K_YY, const_diagonal=const_diagonal, biased=biased) + ratio = mmd2 / tf.sqrt(tf.maximum(var_est, min_var_est)) + return mmd2, ratio + + +def _mmd2_and_variance(K_XX, K_XY, K_YY, const_diagonal=False, biased=False): + m = tf.cast(K_XX.get_shape()[0], tf.float32) # Assumes X, Y are same shape + + ### Get the various sums of kernels that we'll use + # Kts drop the diagonal, but we don't need to compute them explicitly + if const_diagonal is not False: + const_diagonal = tf.cast(const_diagonal, tf.float32) + diag_X = diag_Y = const_diagonal + sum_diag_X = sum_diag_Y = m * const_diagonal + sum_diag2_X = sum_diag2_Y = m * const_diagonal**2 + else: + diag_X = tf.diag_part(K_XX) + diag_Y = tf.diag_part(K_YY) + + sum_diag_X = tf.reduce_sum(diag_X) + sum_diag_Y = tf.reduce_sum(diag_Y) + + sum_diag2_X = sq_sum(diag_X) + sum_diag2_Y = sq_sum(diag_Y) + + Kt_XX_sums = tf.reduce_sum(K_XX, 1) - diag_X + Kt_YY_sums = tf.reduce_sum(K_YY, 1) - diag_Y + K_XY_sums_0 = tf.reduce_sum(K_XY, 0) + K_XY_sums_1 = tf.reduce_sum(K_XY, 1) + + Kt_XX_sum = tf.reduce_sum(Kt_XX_sums) + Kt_YY_sum = tf.reduce_sum(Kt_YY_sums) + K_XY_sum = tf.reduce_sum(K_XY_sums_0) + + Kt_XX_2_sum = sq_sum(K_XX) - sum_diag2_X + Kt_YY_2_sum = sq_sum(K_YY) - sum_diag2_Y + K_XY_2_sum = sq_sum(K_XY) + + if biased: + mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m) + + (Kt_YY_sum + sum_diag_Y) / (m * m) + - 2 * K_XY_sum / (m * m)) + else: + mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * (m-1)) + + (Kt_YY_sum + sum_diag_Y) / (m * (m-1)) + - 2 * K_XY_sum / (m * m)) + + var_est = ( + 2 / (m**2 * (m-1)**2) * ( + 2 * sq_sum(Kt_XX_sums) - Kt_XX_2_sum + + 2 * sq_sum(Kt_YY_sums) - Kt_YY_2_sum) + - (4*m-6) / (m**3 * (m-1)**3) * (Kt_XX_sum**2 + Kt_YY_sum**2) + + 4*(m-2) / (m**3 * (m-1)**2) * ( + sq_sum(K_XY_sums_1) + sq_sum(K_XY_sums_0)) + - 4 * (m-3) / (m**3 * (m-1)**2) * K_XY_2_sum + - (8*m - 12) / (m**5 * (m-1)) * K_XY_sum**2 + + 8 / (m**3 * (m-1)) * ( + 1/m * (Kt_XX_sum + Kt_YY_sum) * K_XY_sum + - dot(Kt_XX_sums, K_XY_sums_1) + - dot(Kt_YY_sums, K_XY_sums_0)) + ) + + return mmd2, var_est + + +### additions from stephanie, for convenience + +def median_pairwise_distance(X, Y=None): + """ + Heuristic for bandwidth of the RBF. Median pairwise distance of joint data. + If Y is missing, just calculate it from X: + this is so that, during training, as Y changes, we can use a fixed + bandwidth (and save recalculating this each time we evaluated the mmd) + At the end of training, we do the heuristic "correctly" by including + both X and Y. + + Note: most of this code is assuming tensorflow, but X and Y are just ndarrays + """ + if Y is None: + Y = X # this is horrendously inefficient, sorry + + if len(X.shape) == 2: + # matrix + X_sqnorms = einsum('...i,...i', X, X) + Y_sqnorms = einsum('...i,...i', Y, Y) + XY = einsum('ia,ja', X, Y) + elif len(X.shape) == 3: + # tensor -- this is computing the Frobenius norm + X_sqnorms = einsum('...ij,...ij', X, X) + Y_sqnorms = einsum('...ij,...ij', Y, Y) + XY = einsum('iab,jab', X, Y) + else: + raise ValueError(X) + + distances = np.sqrt(X_sqnorms.reshape(-1, 1) - 2*XY + Y_sqnorms.reshape(1, -1)) + return median(distances) + + +def median_pairwise_distance_o(X, Y=None): + """ + Heuristic for bandwidth of the RBF. Median pairwise distance of joint data. + If Y is missing, just calculate it from X: + this is so that, during training, as Y changes, we can use a fixed + bandwidth (and save recalculating this each time we evaluated the mmd) + At the end of training, we do the heuristic "correctly" by including + both X and Y. + + Note: most of this code is assuming tensorflow, but X and Y are just ndarrays + """ + if Y is None: + Y = X # this is horrendously inefficient, sorry + + if len(X.shape) == 2: + # matrix + X_sqnorms = np.einsum('...i,...i', X, X) + Y_sqnorms = np.einsum('...i,...i', Y, Y) + XY = np.einsum('ia,ja', X, Y) + elif len(X.shape) == 3: + # tensor -- this is computing the Frobenius norm + X_sqnorms = np.einsum('...ij,...ij', X, X) # reduce the tensor shape + Y_sqnorms = np.einsum('...ij,...ij', Y, Y) + XY = np.einsum('iab,jab', X, Y) # X*Y^T?? + else: + raise ValueError(X) + + distances = np.sqrt(X_sqnorms.reshape(-1, 1) - 2 * XY + Y_sqnorms.reshape(1, -1)) + distances = distances.reshape(-1, 1) + distances = distances[~np.isnan(distances)] + return np.median(distances) \ No newline at end of file diff --git a/mod_core_rnn_cell_impl.py b/mod_core_rnn_cell_impl.py new file mode 100644 index 0000000..265538e --- /dev/null +++ b/mod_core_rnn_cell_impl.py @@ -0,0 +1,1063 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#modified by Stephanie (@corcra) to enable initializing the bias term in lstm """ +# ============================================================================== + +"""Module implementing RNN Cells.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import contextlib +import hashlib +import math +import numbers + +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import clip_ops +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import random_ops +from tensorflow.python.ops import variable_scope as vs + +from tensorflow.python.ops.math_ops import sigmoid +from tensorflow.python.ops.math_ops import tanh +#from tensorflow.python.ops.rnn_cell_impl import _RNNCell as RNNCell +from tensorflow.python.ops.rnn_cell_impl import RNNCell + +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util import nest + + +_BIAS_VARIABLE_NAME = "biases" +_WEIGHTS_VARIABLE_NAME = "weights" + + +@contextlib.contextmanager +def _checked_scope(cell, scope, reuse=None, **kwargs): + if reuse is not None: + kwargs["reuse"] = reuse + with vs.variable_scope(scope, **kwargs) as checking_scope: + scope_name = checking_scope.name + if hasattr(cell, "_scope"): + cell_scope = cell._scope # pylint: disable=protected-access + if cell_scope.name != checking_scope.name: + raise ValueError( + "Attempt to reuse RNNCell %s with a different variable scope than " + "its first use. First use of cell was with scope '%s', this " + "attempt is with scope '%s'. Please create a new instance of the " + "cell if you would like it to use a different set of weights. " + "If before you were using: MultiRNNCell([%s(...)] * num_layers), " + "change to: MultiRNNCell([%s(...) for _ in range(num_layers)]). " + "If before you were using the same cell instance as both the " + "forward and reverse cell of a bidirectional RNN, simply create " + "two instances (one for forward, one for reverse). " + "In May 2017, we will start transitioning this cell's behavior " + "to use existing stored weights, if any, when it is called " + "with scope=None (which can lead to silent model degradation, so " + "this error will remain until then.)" + % (cell, cell_scope.name, scope_name, type(cell).__name__, + type(cell).__name__)) + else: + weights_found = False + try: + with vs.variable_scope(checking_scope, reuse=True): + vs.get_variable(_WEIGHTS_VARIABLE_NAME) + weights_found = True + except ValueError: + pass + if weights_found and reuse is None: + raise ValueError( + "Attempt to have a second RNNCell use the weights of a variable " + "scope that already has weights: '%s'; and the cell was not " + "constructed as %s(..., reuse=True). " + "To share the weights of an RNNCell, simply " + "reuse it in your second calculation, or create a new one with " + "the argument reuse=True." % (scope_name, type(cell).__name__)) + + # Everything is OK. Update the cell's scope and yield it. + cell._scope = checking_scope # pylint: disable=protected-access + yield checking_scope + + +class BasicRNNCell(RNNCell): + """The most basic RNN cell.""" + + def __init__(self, num_units, input_size=None, activation=tanh, reuse=None): + if input_size is not None: + logging.warn("%s: The input_size parameter is deprecated.", self) + self._num_units = num_units + self._activation = activation + self._reuse = reuse + + @property + def state_size(self): + return self._num_units + + @property + def output_size(self): + return self._num_units + + def __call__(self, inputs, state, scope=None): + """Most basic RNN: output = new_state = act(W * input + U * state + B).""" + with _checked_scope(self, scope or "basic_rnn_cell", reuse=self._reuse): + output = self._activation( + _linear([inputs, state], self._num_units, True)) + return output, output + + +class GRUCell(RNNCell): + """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).""" + + def __init__(self, num_units, input_size=None, activation=tanh, reuse=None): + if input_size is not None: + logging.warn("%s: The input_size parameter is deprecated.", self) + self._num_units = num_units + self._activation = activation + self._reuse = reuse + + @property + def state_size(self): + return self._num_units + + @property + def output_size(self): + return self._num_units + + def __call__(self, inputs, state, scope=None): + """Gated recurrent unit (GRU) with nunits cells.""" + with _checked_scope(self, scope or "gru_cell", reuse=self._reuse): + with vs.variable_scope("gates"): # Reset gate and update gate. + # We start with bias of 1.0 to not reset and not update. + value = sigmoid(_linear( + [inputs, state], 2 * self._num_units, True, 1.0)) + r, u = array_ops.split( + value=value, + num_or_size_splits=2, + axis=1) + with vs.variable_scope("candidate"): + c = self._activation(_linear([inputs, r * state], + self._num_units, True)) + new_h = u * state + (1 - u) * c + return new_h, new_h + + +_LSTMStateTuple = collections.namedtuple("LSTMStateTuple", ("c", "h")) + + +class LSTMStateTuple(_LSTMStateTuple): + """Tuple used by LSTM Cells for `state_size`, `zero_state`, and output state. + + Stores two elements: `(c, h)`, in that order. + + Only used when `state_is_tuple=True`. + """ + __slots__ = () + + @property + def dtype(self): + (c, h) = self + if not c.dtype == h.dtype: + raise TypeError("Inconsistent internal state: %s vs %s" % + (str(c.dtype), str(h.dtype))) + return c.dtype + + +class BasicLSTMCell(RNNCell): + """Basic LSTM recurrent network cell. + + The implementation is based on: http://arxiv.org/abs/1409.2329. + + We add forget_bias (default: 1) to the biases of the forget gate in order to + reduce the scale of forgetting in the beginning of the training. + + It does not allow cell clipping, a projection layer, and does not + use peep-hole connections: it is the basic baseline. + + For advanced models, please use the full LSTMCell that follows. + """ + + def __init__(self, num_units, forget_bias=1.0, input_size=None, + state_is_tuple=True, activation=tanh, reuse=None): + """Initialize the basic LSTM cell. + + Args: + num_units: int, The number of units in the LSTM cell. + forget_bias: float, The bias added to forget gates (see above). + input_size: Deprecated and unused. + state_is_tuple: If True, accepted and returned states are 2-tuples of + the `c_state` and `m_state`. If False, they are concatenated + along the column axis. The latter behavior will soon be deprecated. + activation: Activation function of the inner states. + reuse: (optional) Python boolean describing whether to reuse variables + in an existing scope. If not `True`, and the existing scope already has + the given variables, an error is raised. + """ + if not state_is_tuple: + logging.warn("%s: Using a concatenated state is slower and will soon be " + "deprecated. Use state_is_tuple=True.", self) + if input_size is not None: + logging.warn("%s: The input_size parameter is deprecated.", self) + self._num_units = num_units + self._forget_bias = forget_bias + self._state_is_tuple = state_is_tuple + self._activation = activation + self._reuse = reuse + + @property + def state_size(self): + return (LSTMStateTuple(self._num_units, self._num_units) + if self._state_is_tuple else 2 * self._num_units) + + @property + def output_size(self): + return self._num_units + + def __call__(self, inputs, state, scope=None): + """Long short-term memory cell (LSTM).""" + with _checked_scope(self, scope or "basic_lstm_cell", reuse=self._reuse): + # Parameters of gates are concatenated into one multiply for efficiency. + if self._state_is_tuple: + c, h = state + else: + c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1) + concat = _linear([inputs, h], 4 * self._num_units, True) + + # i = input_gate, j = new_input, f = forget_gate, o = output_gate + i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1) + + new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * + self._activation(j)) + new_h = self._activation(new_c) * sigmoid(o) + + if self._state_is_tuple: + new_state = LSTMStateTuple(new_c, new_h) + else: + new_state = array_ops.concat([new_c, new_h], 1) + return new_h, new_state + + +class LSTMCell(RNNCell): + """Long short-term memory unit (LSTM) recurrent network cell. + + The default non-peephole implementation is based on: + + http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf + + S. Hochreiter and J. Schmidhuber. + "Long Short-Term Memory". Neural Computation, 9(8):1735-1780, 1997. + + The peephole implementation is based on: + + https://research.google.com/pubs/archive/43905.pdf + + Hasim Sak, Andrew Senior, and Francoise Beaufays. + "Long short-term memory recurrent neural network architectures for + large scale acoustic modeling." INTERSPEECH, 2014. + + The class uses optional peep-hole connections, optional cell clipping, and + an optional projection layer. + """ + + def __init__(self, num_units, input_size=None, + use_peepholes=False, cell_clip=None, + initializer=None, bias_start=0.0, num_proj=None, proj_clip=None, + num_unit_shards=None, num_proj_shards=None, + forget_bias=1.0, state_is_tuple=True, + activation=tanh, reuse=None): + """Initialize the parameters for an LSTM cell. + + Args: + num_units: int, The number of units in the LSTM cell + input_size: Deprecated and unused. + use_peepholes: bool, set True to enable diagonal/peephole connections. + cell_clip: (optional) A float value, if provided the cell state is clipped + by this value prior to the cell output activation. + initializer: (optional) The initializer to use for the weight and + projection matrices. + bias_start: (optional) The VALUE to initialize the bias to, in + the linear call + num_proj: (optional) int, The output dimensionality for the projection + matrices. If None, no projection is performed. + proj_clip: (optional) A float value. If `num_proj > 0` and `proj_clip` is + provided, then the projected values are clipped elementwise to within + `[-proj_clip, proj_clip]`. + num_unit_shards: Deprecated, will be removed by Jan. 2017. + Use a variable_scope partitioner instead. + num_proj_shards: Deprecated, will be removed by Jan. 2017. + Use a variable_scope partitioner instead. + forget_bias: Biases of the forget gate are initialized by default to 1 + in order to reduce the scale of forgetting at the beginning of + the training. + state_is_tuple: If True, accepted and returned states are 2-tuples of + the `c_state` and `m_state`. If False, they are concatenated + along the column axis. This latter behavior will soon be deprecated. + activation: Activation function of the inner states. + reuse: (optional) Python boolean describing whether to reuse variables + in an existing scope. If not `True`, and the existing scope already has + the given variables, an error is raised. + """ + if not state_is_tuple: + logging.warn("%s: Using a concatenated state is slower and will soon be " + "deprecated. Use state_is_tuple=True.", self) + if input_size is not None: + logging.warn("%s: The input_size parameter is deprecated.", self) + if num_unit_shards is not None or num_proj_shards is not None: + logging.warn( + "%s: The num_unit_shards and proj_unit_shards parameters are " + "deprecated and will be removed in Jan 2017. " + "Use a variable scope with a partitioner instead.", self) + + self._num_units = num_units + self._use_peepholes = use_peepholes + self._cell_clip = cell_clip + self._initializer = initializer + self._bias_start = bias_start + self._num_proj = num_proj + self._proj_clip = proj_clip + self._num_unit_shards = num_unit_shards + self._num_proj_shards = num_proj_shards + self._forget_bias = forget_bias + self._state_is_tuple = state_is_tuple + self._activation = activation + self._reuse = reuse + + if num_proj: + self._state_size = ( + LSTMStateTuple(num_units, num_proj) + if state_is_tuple else num_units + num_proj) + self._output_size = num_proj + else: + self._state_size = ( + LSTMStateTuple(num_units, num_units) + if state_is_tuple else 2 * num_units) + self._output_size = num_units + + @property + def state_size(self): + return self._state_size + + @property + def output_size(self): + return self._output_size + + def __call__(self, inputs, state, scope=None): + """Run one step of LSTM. + + Args: + inputs: input Tensor, 2D, batch x num_units. + state: if `state_is_tuple` is False, this must be a state Tensor, + `2-D, batch x state_size`. If `state_is_tuple` is True, this must be a + tuple of state Tensors, both `2-D`, with column sizes `c_state` and + `m_state`. + scope: VariableScope for the created subgraph; defaults to "lstm_cell". + + Returns: + A tuple containing: + + - A `2-D, [batch x output_dim]`, Tensor representing the output of the + LSTM after reading `inputs` when previous state was `state`. + Here output_dim is: + num_proj if num_proj was set, + num_units otherwise. + - Tensor(s) representing the new state of LSTM after reading `inputs` when + the previous state was `state`. Same type and shape(s) as `state`. + + Raises: + ValueError: If input size cannot be inferred from inputs via + static shape inference. + """ + num_proj = self._num_units if self._num_proj is None else self._num_proj + + if self._state_is_tuple: + (c_prev, m_prev) = state + else: + c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units]) + m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj]) + + dtype = inputs.dtype + input_size = inputs.get_shape().with_rank(2)[1] + if input_size.value is None: + raise ValueError("Could not infer input size from inputs.get_shape()[-1]") + with _checked_scope(self, scope or "lstm_cell", + initializer=self._initializer, + reuse=self._reuse) as unit_scope: + if self._num_unit_shards is not None: + unit_scope.set_partitioner( + partitioned_variables.fixed_size_partitioner( + self._num_unit_shards)) + # i = input_gate, j = new_input, f = forget_gate, o = output_gate + lstm_matrix = _linear([inputs, m_prev], 4 * self._num_units, bias=True, bias_start=self._bias_start) + i, j, f, o = array_ops.split( + value=lstm_matrix, num_or_size_splits=4, axis=1) + # Diagonal connections + if self._use_peepholes: + with vs.variable_scope(unit_scope) as projection_scope: + if self._num_unit_shards is not None: + projection_scope.set_partitioner(None) + w_f_diag = vs.get_variable( + "w_f_diag", shape=[self._num_units], dtype=dtype) + w_i_diag = vs.get_variable( + "w_i_diag", shape=[self._num_units], dtype=dtype) + w_o_diag = vs.get_variable( + "w_o_diag", shape=[self._num_units], dtype=dtype) + + if self._use_peepholes: + c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev + + sigmoid(i + w_i_diag * c_prev) * self._activation(j)) + else: + c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * + self._activation(j)) + + if self._cell_clip is not None: + # pylint: disable=invalid-unary-operand-type + c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip) + # pylint: enable=invalid-unary-operand-type + if self._use_peepholes: + m = sigmoid(o + w_o_diag * c) * self._activation(c) + else: + m = sigmoid(o) * self._activation(c) + + if self._num_proj is not None: + with vs.variable_scope("projection") as proj_scope: + if self._num_proj_shards is not None: + proj_scope.set_partitioner( + partitioned_variables.fixed_size_partitioner( + self._num_proj_shards)) + m = _linear(m, self._num_proj, bias=False) + + if self._proj_clip is not None: + # pylint: disable=invalid-unary-operand-type + m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip) + # pylint: enable=invalid-unary-operand-type + + new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else + array_ops.concat([c, m], 1)) + return m, new_state + + +class OutputProjectionWrapper(RNNCell): + """Operator adding an output projection to the given cell. + + Note: in many cases it may be more efficient to not use this wrapper, + but instead concatenate the whole sequence of your outputs in time, + do the projection on this batch-concatenated sequence, then split it + if needed or directly feed into a softmax. + """ + + def __init__(self, cell, output_size, reuse=None): + """Create a cell with output projection. + + Args: + cell: an RNNCell, a projection to output_size is added to it. + output_size: integer, the size of the output after projection. + reuse: (optional) Python boolean describing whether to reuse variables + in an existing scope. If not `True`, and the existing scope already has + the given variables, an error is raised. + + Raises: + TypeError: if cell is not an RNNCell. + ValueError: if output_size is not positive. + """ + if not isinstance(cell, RNNCell): + raise TypeError("The parameter cell is not RNNCell.") + if output_size < 1: + raise ValueError("Parameter output_size must be > 0: %d." % output_size) + self._cell = cell + self._output_size = output_size + self._reuse = reuse + + @property + def state_size(self): + return self._cell.state_size + + @property + def output_size(self): + return self._output_size + + def zero_state(self, batch_size, dtype): + with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): + return self._cell.zero_state(batch_size, dtype) + + def __call__(self, inputs, state, scope=None): + """Run the cell and output projection on inputs, starting from state.""" + output, res_state = self._cell(inputs, state) + # Default scope: "OutputProjectionWrapper" + with _checked_scope(self, scope or "output_projection_wrapper", + reuse=self._reuse): + projected = _linear(output, self._output_size, True) + return projected, res_state + + +class InputProjectionWrapper(RNNCell): + """Operator adding an input projection to the given cell. + + Note: in many cases it may be more efficient to not use this wrapper, + but instead concatenate the whole sequence of your inputs in time, + do the projection on this batch-concatenated sequence, then split it. + """ + + def __init__(self, cell, num_proj, input_size=None): + """Create a cell with input projection. + + Args: + cell: an RNNCell, a projection of inputs is added before it. + num_proj: Python integer. The dimension to project to. + input_size: Deprecated and unused. + + Raises: + TypeError: if cell is not an RNNCell. + """ + if input_size is not None: + logging.warn("%s: The input_size parameter is deprecated.", self) + if not isinstance(cell, RNNCell): + raise TypeError("The parameter cell is not RNNCell.") + self._cell = cell + self._num_proj = num_proj + + @property + def state_size(self): + return self._cell.state_size + + @property + def output_size(self): + return self._cell.output_size + + def zero_state(self, batch_size, dtype): + with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): + return self._cell.zero_state(batch_size, dtype) + + def __call__(self, inputs, state, scope=None): + """Run the input projection and then the cell.""" + # Default scope: "InputProjectionWrapper" + with vs.variable_scope(scope or "input_projection_wrapper"): + projected = _linear(inputs, self._num_proj, True) + return self._cell(projected, state) + + +def _enumerated_map_structure(map_fn, *args, **kwargs): + ix = [0] + def enumerated_fn(*inner_args, **inner_kwargs): + r = map_fn(ix[0], *inner_args, **inner_kwargs) + ix[0] += 1 + return r + return nest.map_structure(enumerated_fn, *args, **kwargs) + + +class DropoutWrapper(RNNCell): + """Operator adding dropout to inputs and outputs of the given cell.""" + + def __init__(self, cell, input_keep_prob=1.0, output_keep_prob=1.0, + state_keep_prob=1.0, variational_recurrent=False, + input_size=None, dtype=None, seed=None): + """Create a cell with added input, state, and/or output dropout. + + If `variational_recurrent` is set to `True` (**NOT** the default behavior), + then the the same dropout mask is applied at every step, as described in: + + Y. Gal, Z Ghahramani. "A Theoretically Grounded Application of Dropout in + Recurrent Neural Networks". https://arxiv.org/abs/1512.05287 + + Otherwise a different dropout mask is applied at every time step. + + Args: + cell: an RNNCell, a projection to output_size is added to it. + input_keep_prob: unit Tensor or float between 0 and 1, input keep + probability; if it is constant and 1, no input dropout will be added. + output_keep_prob: unit Tensor or float between 0 and 1, output keep + probability; if it is constant and 1, no output dropout will be added. + state_keep_prob: unit Tensor or float between 0 and 1, output keep + probability; if it is constant and 1, no output dropout will be added. + State dropout is performed on the *output* states of the cell. + variational_recurrent: Python bool. If `True`, then the same + dropout pattern is applied across all time steps per run call. + If this parameter is set, `input_size` **must** be provided. + input_size: (optional) (possibly nested tuple of) `TensorShape` objects + containing the depth(s) of the input tensors expected to be passed in to + the `DropoutWrapper`. Required and used **iff** + `variational_recurrent = True` and `input_keep_prob < 1`. + dtype: (optional) The `dtype` of the input, state, and output tensors. + Required and used **iff** `variational_recurrent = True`. + seed: (optional) integer, the randomness seed. + + Raises: + TypeError: if cell is not an RNNCell. + ValueError: if any of the keep_probs are not between 0 and 1. + """ + if not isinstance(cell, RNNCell): + raise TypeError("The parameter cell is not a RNNCell.") + with ops.name_scope("DropoutWrapperInit"): + def tensor_and_const_value(v): + tensor_value = ops.convert_to_tensor(v) + const_value = tensor_util.constant_value(tensor_value) + return (tensor_value, const_value) + for prob, attr in [(input_keep_prob, "input_keep_prob"), + (state_keep_prob, "state_keep_prob"), + (output_keep_prob, "output_keep_prob")]: + tensor_prob, const_prob = tensor_and_const_value(prob) + if const_prob is not None: + if const_prob < 0 or const_prob > 1: + raise ValueError("Parameter %s must be between 0 and 1: %d" + % (attr, const_prob)) + setattr(self, "_%s" % attr, float(const_prob)) + else: + setattr(self, "_%s" % attr, tensor_prob) + + # Set cell, variational_recurrent, seed before running the code below + self._cell = cell + self._variational_recurrent = variational_recurrent + self._seed = seed + + self._recurrent_input_noise = None + self._recurrent_state_noise = None + self._recurrent_output_noise = None + + if variational_recurrent: + if dtype is None: + raise ValueError( + "When variational_recurrent=True, dtype must be provided") + + def convert_to_batch_shape(s): + # Prepend a 1 for the batch dimension; for recurrent + # variational dropout we use the same dropout mask for all + # batch elements. + return array_ops.concat( + ([1], tensor_shape.TensorShape(s).as_list()), 0) + + def batch_noise(s, inner_seed): + shape = convert_to_batch_shape(s) + return random_ops.random_uniform(shape, seed=inner_seed, dtype=dtype) + + if (not isinstance(self._input_keep_prob, numbers.Real) or + self._input_keep_prob < 1.0): + if input_size is None: + raise ValueError( + "When variational_recurrent=True and input_keep_prob < 1.0 or " + "is unknown, input_size must be provided") + self._recurrent_input_noise = _enumerated_map_structure( + lambda i, s: batch_noise(s, inner_seed=self._gen_seed("input", i)), + input_size) + self._recurrent_state_noise = _enumerated_map_structure( + lambda i, s: batch_noise(s, inner_seed=self._gen_seed("state", i)), + cell.state_size) + self._recurrent_output_noise = _enumerated_map_structure( + lambda i, s: batch_noise(s, inner_seed=self._gen_seed("output", i)), + cell.output_size) + + def _gen_seed(self, salt_prefix, index): + if self._seed is None: + return None + salt = "%s_%d" % (salt_prefix, index) + string = (str(self._seed) + salt).encode("utf-8") + return int(hashlib.md5(string).hexdigest()[:8], 16) & 0x7FFFFFFF + + @property + def state_size(self): + return self._cell.state_size + + @property + def output_size(self): + return self._cell.output_size + + def zero_state(self, batch_size, dtype): + with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): + return self._cell.zero_state(batch_size, dtype) + + def _variational_recurrent_dropout_value( + self, index, value, noise, keep_prob): + """Performs dropout given the pre-calculated noise tensor.""" + # uniform [keep_prob, 1.0 + keep_prob) + random_tensor = keep_prob + noise + + # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) + binary_tensor = math_ops.floor(random_tensor) + ret = math_ops.div(value, keep_prob) * binary_tensor + ret.set_shape(value.get_shape()) + return ret + + def _dropout(self, values, salt_prefix, recurrent_noise, keep_prob): + """Decides whether to perform standard dropout or recurrent dropout.""" + if not self._variational_recurrent: + def dropout(i, v): + return nn_ops.dropout( + v, keep_prob=keep_prob, seed=self._gen_seed(salt_prefix, i)) + return _enumerated_map_structure(dropout, values) + else: + def dropout(i, v, n): + return self._variational_recurrent_dropout_value(i, v, n, keep_prob) + return _enumerated_map_structure(dropout, values, recurrent_noise) + + def __call__(self, inputs, state, scope=None): + """Run the cell with the declared dropouts.""" + def _should_dropout(p): + return (not isinstance(p, float)) or p < 1 + + if _should_dropout(self._input_keep_prob): + inputs = self._dropout(inputs, "input", + self._recurrent_input_noise, + self._input_keep_prob) + output, new_state = self._cell(inputs, state, scope) + if _should_dropout(self._state_keep_prob): + new_state = self._dropout(new_state, "state", + self._recurrent_state_noise, + self._state_keep_prob) + if _should_dropout(self._output_keep_prob): + output = self._dropout(output, "output", + self._recurrent_output_noise, + self._output_keep_prob) + return output, new_state + + +class ResidualWrapper(RNNCell): + """RNNCell wrapper that ensures cell inputs are added to the outputs.""" + + def __init__(self, cell): + """Constructs a `ResidualWrapper` for `cell`. + + Args: + cell: An instance of `RNNCell`. + """ + self._cell = cell + + @property + def state_size(self): + return self._cell.state_size + + @property + def output_size(self): + return self._cell.output_size + + def zero_state(self, batch_size, dtype): + with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): + return self._cell.zero_state(batch_size, dtype) + + def __call__(self, inputs, state, scope=None): + """Run the cell and add its inputs to its outputs. + + Args: + inputs: cell inputs. + state: cell state. + scope: optional cell scope. + + Returns: + Tuple of cell outputs and new state. + + Raises: + TypeError: If cell inputs and outputs have different structure (type). + ValueError: If cell inputs and outputs have different structure (value). + """ + outputs, new_state = self._cell(inputs, state, scope=scope) + nest.assert_same_structure(inputs, outputs) + # Ensure shapes match + def assert_shape_match(inp, out): + inp.get_shape().assert_is_compatible_with(out.get_shape()) + nest.map_structure(assert_shape_match, inputs, outputs) + res_outputs = nest.map_structure( + lambda inp, out: inp + out, inputs, outputs) + return (res_outputs, new_state) + + +class DeviceWrapper(RNNCell): + """Operator that ensures an RNNCell runs on a particular device.""" + + def __init__(self, cell, device): + """Construct a `DeviceWrapper` for `cell` with device `device`. + + Ensures the wrapped `cell` is called with `tf.device(device)`. + + Args: + cell: An instance of `RNNCell`. + device: A device string or function, for passing to `tf.device`. + """ + self._cell = cell + self._device = device + + @property + def state_size(self): + return self._cell.state_size + + @property + def output_size(self): + return self._cell.output_size + + def zero_state(self, batch_size, dtype): + with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): + return self._cell.zero_state(batch_size, dtype) + + def __call__(self, inputs, state, scope=None): + """Run the cell on specified device.""" + with ops.device(self._device): + return self._cell(inputs, state, scope=scope) + + +class EmbeddingWrapper(RNNCell): + """Operator adding input embedding to the given cell. + + Note: in many cases it may be more efficient to not use this wrapper, + but instead concatenate the whole sequence of your inputs in time, + do the embedding on this batch-concatenated sequence, then split it and + feed into your RNN. + """ + + def __init__(self, cell, embedding_classes, embedding_size, initializer=None, + reuse=None): + """Create a cell with an added input embedding. + + Args: + cell: an RNNCell, an embedding will be put before its inputs. + embedding_classes: integer, how many symbols will be embedded. + embedding_size: integer, the size of the vectors we embed into. + initializer: an initializer to use when creating the embedding; + if None, the initializer from variable scope or a default one is used. + reuse: (optional) Python boolean describing whether to reuse variables + in an existing scope. If not `True`, and the existing scope already has + the given variables, an error is raised. + + Raises: + TypeError: if cell is not an RNNCell. + ValueError: if embedding_classes is not positive. + """ + if not isinstance(cell, RNNCell): + raise TypeError("The parameter cell is not RNNCell.") + if embedding_classes <= 0 or embedding_size <= 0: + raise ValueError("Both embedding_classes and embedding_size must be > 0: " + "%d, %d." % (embedding_classes, embedding_size)) + self._cell = cell + self._embedding_classes = embedding_classes + self._embedding_size = embedding_size + self._initializer = initializer + self._reuse = reuse + + @property + def state_size(self): + return self._cell.state_size + + @property + def output_size(self): + return self._cell.output_size + + def zero_state(self, batch_size, dtype): + with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): + return self._cell.zero_state(batch_size, dtype) + + def __call__(self, inputs, state, scope=None): + """Run the cell on embedded inputs.""" + with _checked_scope(self, scope or "embedding_wrapper", reuse=self._reuse): + with ops.device("/cpu:0"): + if self._initializer: + initializer = self._initializer + elif vs.get_variable_scope().initializer: + initializer = vs.get_variable_scope().initializer + else: + # Default initializer for embeddings should have variance=1. + sqrt3 = math.sqrt(3) # Uniform(-sqrt(3), sqrt(3)) has variance=1. + initializer = init_ops.random_uniform_initializer(-sqrt3, sqrt3) + + if type(state) is tuple: + data_type = state[0].dtype + else: + data_type = state.dtype + + embedding = vs.get_variable( + "embedding", [self._embedding_classes, self._embedding_size], + initializer=initializer, + dtype=data_type) + embedded = embedding_ops.embedding_lookup( + embedding, array_ops.reshape(inputs, [-1])) + return self._cell(embedded, state) + + +class MultiRNNCell(RNNCell): + """RNN cell composed sequentially of multiple simple cells.""" + + def __init__(self, cells, state_is_tuple=True): + """Create a RNN cell composed sequentially of a number of RNNCells. + + Args: + cells: list of RNNCells that will be composed in this order. + state_is_tuple: If True, accepted and returned states are n-tuples, where + `n = len(cells)`. If False, the states are all + concatenated along the column axis. This latter behavior will soon be + deprecated. + + Raises: + ValueError: if cells is empty (not allowed), or at least one of the cells + returns a state tuple but the flag `state_is_tuple` is `False`. + """ + if not cells: + raise ValueError("Must specify at least one cell for MultiRNNCell.") + if not nest.is_sequence(cells): + raise TypeError( + "cells must be a list or tuple, but saw: %s." % cells) + + self._cells = cells + self._state_is_tuple = state_is_tuple + if not state_is_tuple: + if any(nest.is_sequence(c.state_size) for c in self._cells): + raise ValueError("Some cells return tuples of states, but the flag " + "state_is_tuple is not set. State sizes are: %s" + % str([c.state_size for c in self._cells])) + + @property + def state_size(self): + if self._state_is_tuple: + return tuple(cell.state_size for cell in self._cells) + else: + return sum([cell.state_size for cell in self._cells]) + + @property + def output_size(self): + return self._cells[-1].output_size + + def zero_state(self, batch_size, dtype): + with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): + if self._state_is_tuple: + return tuple(cell.zero_state(batch_size, dtype) for cell in self._cells) + else: + # We know here that state_size of each cell is not a tuple and + # presumably does not contain TensorArrays or anything else fancy + return super(MultiRNNCell, self).zero_state(batch_size, dtype) + + def __call__(self, inputs, state, scope=None): + """Run this multi-layer cell on inputs, starting from state.""" + with vs.variable_scope(scope or "multi_rnn_cell"): + cur_state_pos = 0 + cur_inp = inputs + new_states = [] + for i, cell in enumerate(self._cells): + with vs.variable_scope("cell_%d" % i): + if self._state_is_tuple: + if not nest.is_sequence(state): + raise ValueError( + "Expected state to be a tuple of length %d, but received: %s" + % (len(self.state_size), state)) + cur_state = state[i] + else: + cur_state = array_ops.slice( + state, [0, cur_state_pos], [-1, cell.state_size]) + cur_state_pos += cell.state_size + cur_inp, new_state = cell(cur_inp, cur_state) + new_states.append(new_state) + new_states = (tuple(new_states) if self._state_is_tuple else + array_ops.concat(new_states, 1)) + return cur_inp, new_states + + +class _SlimRNNCell(RNNCell): + """A simple wrapper for slim.rnn_cells.""" + + def __init__(self, cell_fn): + """Create a SlimRNNCell from a cell_fn. + + Args: + cell_fn: a function which takes (inputs, state, scope) and produces the + outputs and the new_state. Additionally when called with inputs=None and + state=None it should return (initial_outputs, initial_state). + + Raises: + TypeError: if cell_fn is not callable + ValueError: if cell_fn cannot produce a valid initial state. + """ + if not callable(cell_fn): + raise TypeError("cell_fn %s needs to be callable", cell_fn) + self._cell_fn = cell_fn + self._cell_name = cell_fn.func.__name__ + init_output, init_state = self._cell_fn(None, None) + output_shape = init_output.get_shape() + state_shape = init_state.get_shape() + self._output_size = output_shape.with_rank(2)[1].value + self._state_size = state_shape.with_rank(2)[1].value + if self._output_size is None: + raise ValueError("Initial output created by %s has invalid shape %s" % + (self._cell_name, output_shape)) + if self._state_size is None: + raise ValueError("Initial state created by %s has invalid shape %s" % + (self._cell_name, state_shape)) + + @property + def state_size(self): + return self._state_size + + @property + def output_size(self): + return self._output_size + + def __call__(self, inputs, state, scope=None): + scope = scope or self._cell_name + output, state = self._cell_fn(inputs, state, scope=scope) + return output, state + + +def _linear(args, output_size, bias, bias_start=0.0, scope=None): + """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable. + + Args: + args: a 2D Tensor or a list of 2D, batch x n, Tensors. + output_size: int, second dimension of W[i]. + bias: boolean, whether to add a bias term or not. + bias_start: starting value to initialize the bias; 0 by default. + + Returns: + A 2D Tensor with shape [batch x output_size] equal to + sum_i(args[i] * W[i]), where W[i]s are newly created matrices. + + Raises: + ValueError: if some of the arguments has unspecified or wrong shape. + """ + if args is None or (nest.is_sequence(args) and not args): + raise ValueError("`args` must be specified") + if not nest.is_sequence(args): + args = [args] + + # Calculate the total size of arguments on dimension 1. + total_arg_size = 0 + shapes = [a.get_shape() for a in args] + for shape in shapes: + if shape.ndims != 2: + raise ValueError("linear is expecting 2D arguments: %s" % shapes) + if shape[1].value is None: + raise ValueError("linear expects shape[1] to be provided for shape %s, " + "but saw %s" % (shape, shape[1])) + else: + total_arg_size += shape[1].value + + dtype = [a.dtype for a in args][0] + + # Now the computation. + scope = vs.get_variable_scope() + with vs.variable_scope(scope) as outer_scope: + weights = vs.get_variable( + _WEIGHTS_VARIABLE_NAME, [total_arg_size, output_size], dtype=dtype) + if len(args) == 1: + res = math_ops.matmul(args[0], weights) + else: + res = math_ops.matmul(array_ops.concat(args, 1), weights) + if not bias: + return res + with vs.variable_scope(outer_scope) as inner_scope: + inner_scope.set_partitioner(None) + biases = vs.get_variable( + _BIAS_VARIABLE_NAME, [output_size], + dtype=dtype, + initializer=init_ops.constant_initializer(bias_start, dtype=dtype)) + return nn_ops.bias_add(res, biases) diff --git a/model.py b/model.py new file mode 100644 index 0000000..5c5c069 --- /dev/null +++ b/model.py @@ -0,0 +1,289 @@ +import tensorflow as tf +import numpy as np +# from data_utils import get_batch +import data_utils +import pdb +import json +import sys +from mod_core_rnn_cell_impl import LSTMCell # modified to allow initializing bias in lstm + +# from tensorflow.contrib.rnn import LSTMCell +tf.logging.set_verbosity(tf.logging.ERROR) +import mmd + +from differential_privacy.dp_sgd.dp_optimizer import dp_optimizer +from differential_privacy.dp_sgd.dp_optimizer import sanitizer +from differential_privacy.privacy_accountant.tf import accountant + +# ------------------------------- # +""" +Most of the models are copied from https://github.com/ratschlab/RGAN +""" + +# --- to do with latent space --- # + +def sample_Z(batch_size, seq_length, latent_dim, use_time=False, use_noisy_time=False): + sample = np.float32(np.random.normal(size=[batch_size, seq_length, latent_dim])) + if use_time: + print('WARNING: use_time has different semantics') + sample[:, :, 0] = np.linspace(0, 1.0 / seq_length, num=seq_length) + return sample + +# --- samples for testing ---# + +def sample_T(batch_size, batch_idx): + samples_aaa = np.load('./data/samples_aa.npy') + num_samples_t = samples_aaa.shape[0] + labels_aaa = np.load('./data/labels_aa.npy') + idx_aaa = np.load('./data/idx_aa.npy') + start_pos = batch_idx * batch_size + end_pos = start_pos + batch_size + T_mb = samples_aaa[start_pos:end_pos, :, :] + L_mb = labels_aaa[start_pos:end_pos, :, :] + I_mb = idx_aaa[start_pos:end_pos, :, :] + return T_mb, L_mb, I_mb, num_samples_t + +def sample_TT(batch_size): + samples_aaa = np.load('./data/samples_aa.npy') + labels_aaa = np.load('./data/labels_aa.npy') + idx_aaa = np.load('./data/idx_aa.npy') + T_indices = np.random.choice(len(samples_aaa), size=batch_size, replace=False) + T_mb = samples_aaa[T_indices, :, :] + L_mb = labels_aaa[T_indices, :, :] + I_mb = idx_aaa[T_indices, :, :] + return T_mb, L_mb, I_mb + +# --- to do with training --- # +def train_epoch(epoch, samples, labels, sess, Z, X, D_loss, G_loss, D_solver, G_solver, + batch_size, use_time, D_rounds, G_rounds, seq_length, + latent_dim, num_signals): + """ + Train generator and discriminator for one epoch. + """ + # for batch_idx in range(0, int(len(samples) / batch_size) - (D_rounds + (cond_dim > 0) * G_rounds), D_rounds + (cond_dim > 0) * G_rounds): + for batch_idx in range(0, int(len(samples) / batch_size) - (D_rounds + G_rounds), D_rounds + G_rounds): + # update the discriminator + X_mb, Y_mb = data_utils.get_batch(samples, batch_size, batch_idx, labels) + Z_mb = sample_Z(batch_size, seq_length, latent_dim, use_time) + for d in range(D_rounds): + # run the discriminator solver + _ = sess.run(D_solver, feed_dict={X: X_mb, Z: Z_mb}) + + # update the generator + for g in range(G_rounds): + # run the generator solver + _ = sess.run(G_solver, feed_dict={Z: sample_Z(batch_size, seq_length, latent_dim, use_time=use_time)}) + + # at the end, get the loss + D_loss_curr, G_loss_curr = sess.run([D_loss, G_loss], feed_dict={X: X_mb, + Z: sample_Z(batch_size, seq_length, latent_dim, + use_time=use_time)}) + D_loss_curr = np.mean(D_loss_curr) + G_loss_curr = np.mean(G_loss_curr) + + + return D_loss_curr, G_loss_curr + + +def GAN_loss(Z, X, generator_settings, discriminator_settings): + + # normal GAN + G_sample = generator(Z, **generator_settings) + + D_real, D_logit_real = discriminator(X, **discriminator_settings) + + D_fake, D_logit_fake = discriminator(G_sample, reuse=True, **discriminator_settings) + + # Measures the probability error in discrete classification tasks in which each class is independent + # and not mutually exclusive. + # logits: predicted labels?? + + D_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_real, labels=tf.ones_like(D_logit_real)), 1) + D_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.zeros_like(D_logit_fake)), 1) + + D_loss = D_loss_real + D_loss_fake + + + # G_loss = tf.reduce_mean(tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.ones_like(D_logit_fake)), axis=1)) + G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.ones_like(D_logit_fake)), 1) + + + return D_loss, G_loss + + +def GAN_solvers(D_loss, G_loss, learning_rate, batch_size, total_examples, l2norm_bound, batches_per_lot, sigma, dp=False): + """ + Optimizers + """ + discriminator_vars = [v for v in tf.trainable_variables() if v.name.startswith('discriminator')] + generator_vars = [v for v in tf.trainable_variables() if v.name.startswith('generator')] + if dp: + print('Using differentially private SGD to train discriminator!') + eps = tf.placeholder(tf.float32) + delta = tf.placeholder(tf.float32) + priv_accountant = accountant.GaussianMomentsAccountant(total_examples) + clip = True + l2norm_bound = l2norm_bound / batch_size + batches_per_lot = 1 + gaussian_sanitizer = sanitizer.AmortizedGaussianSanitizer( + priv_accountant, + [l2norm_bound, clip]) + + # the trick is that we need to calculate the gradient with respect to + # each example in the batch, during the DP SGD step + D_solver = dp_optimizer.DPGradientDescentOptimizer(learning_rate, + [eps, delta], + sanitizer=gaussian_sanitizer, + sigma=sigma, + batches_per_lot=batches_per_lot).minimize(D_loss, var_list=discriminator_vars) + else: + D_loss_mean_over_batch = tf.reduce_mean(D_loss) + D_solver = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(D_loss_mean_over_batch, var_list=discriminator_vars) + priv_accountant = None + G_loss_mean_over_batch = tf.reduce_mean(G_loss) + G_solver = tf.train.AdamOptimizer().minimize(G_loss_mean_over_batch, var_list=generator_vars) + return D_solver, G_solver, priv_accountant + + +# --- to do with the model --- # + +def create_placeholders(batch_size, seq_length, latent_dim, num_signals): + + Z = tf.placeholder(tf.float32, [batch_size, seq_length, latent_dim]) + X = tf.placeholder(tf.float32, [batch_size, seq_length, num_signals]) + T = tf.placeholder(tf.float32, [batch_size, seq_length, num_signals]) + return Z, X, T + +def generator(z, hidden_units_g, seq_length, batch_size, num_signals, reuse=False, parameters=None, learn_scale=True): + + """ + If parameters are supplied, initialise as such + """ + with tf.variable_scope("generator") as scope: + if reuse: + scope.reuse_variables() + if parameters is None: + W_out_G_initializer = tf.truncated_normal_initializer() + b_out_G_initializer = tf.truncated_normal_initializer() + scale_out_G_initializer = tf.constant_initializer(value=1.0) + lstm_initializer = None + bias_start = 1.0 + else: + W_out_G_initializer = tf.constant_initializer(value=parameters['generator/W_out_G:0']) + b_out_G_initializer = tf.constant_initializer(value=parameters['generator/b_out_G:0']) + try: + scale_out_G_initializer = tf.constant_initializer(value=parameters['generator/scale_out_G:0']) + except KeyError: + scale_out_G_initializer = tf.constant_initializer(value=1) + assert learn_scale + lstm_initializer = tf.constant_initializer(value=parameters['generator/rnn/lstm_cell/weights:0']) + bias_start = parameters['generator/rnn/lstm_cell/biases:0'] + + W_out_G = tf.get_variable(name='W_out_G', shape=[hidden_units_g, num_signals], + initializer=W_out_G_initializer) + b_out_G = tf.get_variable(name='b_out_G', shape=num_signals, initializer=b_out_G_initializer) + scale_out_G = tf.get_variable(name='scale_out_G', shape=1, initializer=scale_out_G_initializer, + trainable=learn_scale) + # inputs + inputs = z + + cell = LSTMCell(num_units=hidden_units_g, + state_is_tuple=True, + initializer=lstm_initializer, + bias_start=bias_start, + reuse=reuse) + rnn_outputs, rnn_states = tf.nn.dynamic_rnn( + cell=cell, + dtype=tf.float32, + sequence_length=[seq_length] * batch_size, + inputs=inputs) + rnn_outputs_2d = tf.reshape(rnn_outputs, [-1, hidden_units_g]) + logits_2d = tf.matmul(rnn_outputs_2d, W_out_G) + b_out_G #out put weighted sum + # output_2d = tf.multiply(tf.nn.tanh(logits_2d), scale_out_G) + output_2d = tf.nn.tanh(logits_2d) # logits operation [-1, 1] + output_3d = tf.reshape(output_2d, [-1, seq_length, num_signals]) + + return output_3d + + +def discriminator(x, hidden_units_d, seq_length, batch_size, reuse=False, parameters=None, batch_mean=False): + with tf.variable_scope("discriminator") as scope: + if reuse: + scope.reuse_variables() + if parameters is None: + W_out_D = tf.get_variable(name='W_out_D', shape=[hidden_units_d, 1], + initializer=tf.truncated_normal_initializer()) + b_out_D = tf.get_variable(name='b_out_D', shape=1, + initializer=tf.truncated_normal_initializer()) + + else: + W_out_D = tf.constant_initializer(value=parameters['discriminator/W_out_D:0']) + b_out_D = tf.constant_initializer(value=parameters['discriminator/b_out_D:0']) + + # inputs + inputs = x + + # add the average of the inputs to the inputs (mode collapse? + if batch_mean: + mean_over_batch = tf.stack([tf.reduce_mean(x, axis=0)] * batch_size, axis=0) + inputs = tf.concat([x, mean_over_batch], axis=2) + + cell = tf.contrib.rnn.LSTMCell(num_units=hidden_units_d, + state_is_tuple=True, + reuse=reuse) + rnn_outputs, rnn_states = tf.nn.dynamic_rnn( + cell=cell, + dtype=tf.float32, + inputs=inputs) + # logit_final = tf.matmul(rnn_outputs[:, -1], W_final_D) + b_final_D + logits = tf.einsum('ijk,km', rnn_outputs, W_out_D) + b_out_D # output weighted sum + # real logits or actual output layer? + # logit is a function that maps probabilities ([0,1]) to ([-inf,inf]) ? + + output = tf.nn.sigmoid(logits) # y = 1 / (1 + exp(-x)). output activation [0, 1]. Probability?? + # sigmoid output ([0,1]), Probability? + + return output, logits + + +# --- display ----# +def display_batch_progression(j, id_max): + ''' + See epoch progression + ''' + batch_progression = int((j / id_max) * 100) + sys.stdout.write(str(batch_progression) + ' % epoch' + chr(13)) + _ = sys.stdout.flush + + +# --- to do with saving/loading --- # + +def dump_parameters(identifier, sess): + """ + Save model parmaters to a numpy file + """ + # dump_path = './experiments/parameters/' + identifier + '.npy' + dump_path = './experiments/parameters/' + identifier + '.npy' + model_parameters = dict() + for v in tf.trainable_variables(): + model_parameters[v.name] = sess.run(v) + np.save(dump_path, model_parameters) + print('Recorded', len(model_parameters), 'parameters to', dump_path) + return True + + +def load_parameters(identifier): + """ + Load parameters from a numpy file + """ + # load_path = './experiments/plots/parameters/' + identifier + '.npy' + # load_path = './experiments/plots/parameters/parameters_60/' + identifier + '.npy' + # load_path = './experiments/parameters/' + identifier + '.npy' + # load_path = './experiments/parameters/' + identifier + '.npy' + model_parameters = np.load(identifier).item() + return model_parameters + + + + + diff --git a/plotting.py b/plotting.py new file mode 100644 index 0000000..1ad0cdc --- /dev/null +++ b/plotting.py @@ -0,0 +1,606 @@ +import numpy as np +import matplotlib as mpl +mpl.use('Agg') +import matplotlib.pyplot as plt +import pdb +from time import time +from matplotlib.colors import hsv_to_rgb +from pandas import read_table, read_hdf + +def plot_label(label, id): + + fig, ax = plt.subplots(1, 1) + ax.plot(label) + fig.savefig("./Figs/Label_" + str(id).zfill(2) + ".png") + plt.clf() + plt.close(fig) + + return + +def visualise_at_epoch(vis_sample, data, predict_labels, epoch, + identifier, num_epochs, resample_rate_in_min, multivariate_mnist, + seq_length, labels): + # TODO: what's with all these arguments + if data == 'mnist': + if predict_labels: + n_labels = 1 + if one_hot: + n_labels = 6 + lab_votes = np.argmax(vis_sample[:, :, -n_labels:], axis=2) + else: + lab_votes = vis_sample[:, :, -n_labels:] + labs, _ = mode(lab_votes, axis=1) + samps = vis_sample[:, :, :-n_labels] + else: + labs = labels + samps = vis_sample + if multivariate_mnist: + save_mnist_plot_sample(samps.reshape(-1, seq_length**2, 1), epoch, identifier, n_samples=6, labels=labs) + else: + save_mnist_plot_sample(samps, epoch, identifier, n_samples=6, labels=labs) + else: + save_plot_sample(vis_sample, epoch, identifier, n_samples=6, + num_epochs=num_epochs) + + return True + + +def save_plot_sample(samples, idx, identifier, n_samples=16, num_epochs=None, ncol=4): + assert n_samples <= samples.shape[0] + assert n_samples % ncol == 0 + sample_length = samples.shape[1] + + if not num_epochs is None: + col = hsv_to_rgb((1, 1.0*(idx)/num_epochs, 0.8)) #convert hsv values in a numpy array to rgb values all values assumed to be in range [0, 1]. + else: + col = 'grey' + + x_points = np.arange(sample_length) #Return evenly spaced values within a given interval. + + nrow = int(n_samples/ncol) + fig, axarr = plt.subplots(nrow, ncol, sharex=True, figsize=(6, 6)) + for m in range(nrow): + for n in range(ncol): + # first column + sample = samples[n*nrow + m, :, :] + axarr[m, n].plot(x_points, sample, color=col) + # axarr[m, n].set_ylim(-1, 1) + for n in range(ncol): + axarr[-1, n].xaxis.set_ticks(range(0, sample_length, int(sample_length/4))) + fig.suptitle(idx) + fig.subplots_adjust(hspace = 0.15) + fig.savefig("./experiments/plots/gs/" + identifier + "_epoch" + str(idx).zfill(4) + ".png") + plt.clf() + plt.close() + return + +def save_plot_interpolate(input_samples, samples, idx, identifier, num_epochs=None, distances=None, sigma=1): + """ very boilerplate, unsure how to make nicer """ + n_samples = samples.shape[0] + sample_length = samples.shape[1] + + if not num_epochs is None: + col = hsv_to_rgb((1, 1.0*(idx)/num_epochs, 0.8)) + else: + col = 'grey' + + x_points = np.arange(sample_length) + if distances is None: + nrow = n_samples + else: + nrow = n_samples + 1 + ncol = 1 + fig, axarr = plt.subplots(nrow, ncol, figsize=(3, 9)) + if distances is None: + startat = 0 + else: + startat = 1 + axarr[0].plot(distances.dA, color='green', label='distance from A', linestyle='--', marker='o', markersize=4) + axarr[0].plot(distances.dB, color='orange', label='distance from B', linestyle='dotted', marker='o', markersize=4) + axarr[0].get_xaxis().set_visible(False) + axarr[0].set_title('distance from endpoints') + for m in range(startat, nrow): + sample = samples[m-startat, :, 0] + axarr[m].plot(x_points, sample, color=col) + for m in range(startat, nrow): + axarr[m].set_ylim(-1.1, 1.1) + axarr[m].set_xlim(0, sample_length) + axarr[m].spines["top"].set_visible(False) + axarr[m].spines["bottom"].set_visible(False) + axarr[m].spines["right"].set_visible(False) + axarr[m].spines["left"].set_visible(False) + axarr[m].tick_params(bottom='off', left='off') + axarr[m].get_xaxis().set_visible(False) + axarr[m].get_yaxis().set_visible(False) + axarr[m].set_facecolor((0.96, 0.96, 0.96)) + if not input_samples is None: + # now do the real samples + axarr[startat].plot(x_points, input_samples[0], color='green', linestyle='--') + axarr[-1].plot(x_points, input_samples[1], color='green', linestyle='--') + + axarr[-1].xaxis.set_ticks(range(0, sample_length, int(sample_length/4))) + fig.suptitle(idx) + fig.subplots_adjust(hspace = 0.2) + fig.savefig("./experiments/plots/" + identifier + "_interpolate.png") + fig.savefig("./experiments/plots/" + identifier + "_interpolate.pdf") + plt.clf() + plt.close() + return + +def reconstruction_errors(identifier, train_errors, vali_errors, + generated_errors, random_errors): + """ + Plot two histogram of the reconstruction errors. + """ + print(identifier) + fig, axarr = plt.subplots(4, 1, sharex=True, figsize=(4, 8)) + axarr[0].hist(train_errors, normed=1, color='green', bins=50) + axarr[0].set_title("train reconstruction errors") + axarr[1].hist(vali_errors, normed=1, color='blue', bins=50) + axarr[1].set_title('vali reconstruction errors') + axarr[2].hist(generated_errors, normed=1, color='pink', bins=50) + axarr[2].set_title('generated reconstruction errors') + axarr[3].hist(random_errors, normed=1, color='grey', bins=50) + axarr[3].set_title('random reconstruction errors') + for ax in axarr: + ax.spines["top"].set_visible(False) + ax.spines["bottom"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.spines["left"].set_visible(False) + ax.tick_params(bottom='off', left='off') + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + axarr[3].set_xlim(0, 0.05) + plt.tight_layout() + plt.savefig('./experiments/plots/' + identifier + '_reconstruction_errors.png') + return True + +def save_plot_reconstruct(real_samples, model_samples, identifier): + assert real_samples.shape == model_samples.shape + sample_length = real_samples.shape[1] + x_points = np.arange(sample_length) + nrow = real_samples.shape[0] + ncol = 2 + fig, axarr = plt.subplots(nrow, ncol, sharex=True, figsize=(6, 6)) + for m in range(nrow): + real_sample = real_samples[m, :, 0] + model_sample = model_samples[m, :, 0] + axarr[m, 0].plot(x_points, real_sample, color='green') + axarr[m, 1].plot(x_points, model_sample, color='red') + axarr[-1, 0].xaxis.set_ticks(range(0, sample_length, int(sample_length/4))) + axarr[-1, 1].xaxis.set_ticks(range(0, sample_length, int(sample_length/4))) + axarr[0, 0].set_title('real') + axarr[0, 1].set_title('reconstructed') + fig.subplots_adjust(hspace = 0.15) + fig.savefig("./experiments/plots/" + identifier + "_reconstruct.png") + plt.clf() + plt.close() + return + +def save_plot_vary_dimension(samples_list, idx, identifier, n_dim): + """ + """ + assert len(samples_list) == n_dim + sample_length = samples_list[0].shape[1] + + x_points = np.arange(sample_length) + + nrow = samples_list[0].shape[0] + sidelength = n_dim*1.5 + fig, axarr = plt.subplots(nrow, n_dim, sharex=True, sharey=True, figsize=(sidelength, sidelength)) + for dim in range(n_dim): + sample_dim = samples_list[dim] + axarr[0, dim].set_title(dim) + h = dim*1.0/n_dim # hue + for n in range(nrow): + sample = sample_dim[n, :, 0] + axarr[n, dim].plot(x_points, sample, color='black') + axarr[n, dim].spines["top"].set_visible(False) + axarr[n, dim].spines["bottom"].set_visible(False) + axarr[n, dim].spines["right"].set_visible(False) + axarr[n, dim].spines["left"].set_visible(False) + axarr[n, dim].tick_params(bottom='off', left='off') + axarr[n, dim].get_xaxis().set_visible(False) + axarr[n, dim].set_facecolor(hsv_to_rgb((h, 0 + 0.25*n/nrow, 0.96))) + axarr[-1, dim].xaxis.set_ticks(range(0, sample_length, int(sample_length/4))) + fig.suptitle(idx) + fig.subplots_adjust(hspace = 0.11, wspace=0.11) + fig.savefig("./experiments/plots/" + identifier + "_epoch" + str(idx).zfill(4) + ".png") + plt.clf() + plt.close() + return True + +def interpolate(sampleA, sampleB=None, n_steps=6): + """ + Plot the linear interpolation between two latent space points. + """ + weights = np.linspace(0, 1, n_steps) + if sampleB is None: + # do it "close by" + sampleB = sampleA + np.random.normal(size=sampleA.shape, scale=0.05) + samples = np.array([w*sampleB + (1-w)*sampleA for w in weights]) + return samples + +def vary_latent_dimension(sample, dimension, n_steps=6): + """ + """ + assert dimension <= sample.shape[1] + scale = np.mean(np.abs(sample[:, dimension])) + deviations = np.linspace(0, 2*scale, n_steps) + samples = np.array([sample[:, :]]*n_steps) + for n in range(n_steps): + samples[n, :, dimension] += deviations[n] + return samples + +def plot_sine_evaluation(real_samples, fake_samples, idx, identifier): + """ + Create histogram of fake (generated) samples frequency, amplitude distribution. + Also for real samples. + """ + ### frequency + seq_length = len(real_samples[0]) # assumes samples are all the same length + frate = seq_length + freqs_hz = np.fft.rfftfreq(seq_length)*frate # this is for labelling the plot + # TODO, just taking axis 0 for now... + w_real = np.mean(np.abs(np.fft.rfft(real_samples[:, :, 0])), axis=0) + w_fake = np.mean(np.abs(np.fft.rfft(fake_samples[:, :, 0])), axis=0) + ### amplitude + A_real = np.max(np.abs(real_samples[:, :, 0]), axis=1) + A_fake = np.max(np.abs(fake_samples[:, :, 0]), axis=1) + ### now plot + nrow = 2 + ncol = 2 + fig, axarr = plt.subplots(nrow, ncol, sharex='col', figsize=(6, 6)) + # freq + axarr[0, 0].vlines(freqs_hz, ymin=np.minimum(np.zeros_like(w_real), w_real), ymax=np.maximum(np.zeros_like(w_real), w_real), color='#30ba50') + axarr[0, 0].set_title("frequency", fontsize=16) + axarr[0, 0].set_ylabel("real", fontsize=16) + axarr[1, 0].vlines(freqs_hz, ymin=np.minimum(np.zeros_like(w_fake), w_fake), ymax=np.maximum(np.zeros_like(w_fake), w_fake), color='#ba4730') + axarr[1, 0].set_ylabel("generated", fontsize=16) + # amplitude + axarr[0, 1].hist(A_real, normed=True, color='#30ba50', bins=30) + axarr[0, 1].set_title("amplitude", fontsize=16) + axarr[1, 1].hist(A_fake, normed=True, color='#ba4730', bins=30) + + fig.savefig('./experiments/plots/' + identifier + '_eval' + str(idx).zfill(4) +'.png') + plt.clf() + plt.close() + return True + +def plot_trace(identifier, xmax=250, final=False, dp=False): + """ + """ + + trace_path = './experiments/traces/' + identifier + '.trace.txt' + da = read_table(trace_path, sep=' ') + nrow = 3 + if dp: + trace_dp_path = './experiments/traces/' + identifier + '.dptrace.txt' + da_dp = read_table(trace_dp_path, sep=' ') + nrow += 1 + + ncol=1 + fig, axarr = plt.subplots(nrow, ncol, sharex='col', figsize=(6, 6)) + + # D_loss + d_handle, = axarr[0].plot(da.epoch, da.D_loss, color='red', label='discriminator') + axarr[0].set_ylabel('D loss') +# axarr[0].set_ylim(0.9, 1.6) + if final: + #D_ticks = [1.0, 1.2, 1.5] + D_ticks = [0.5, 1.0, 1.5] + axarr[0].get_yaxis().set_ticks(D_ticks) + for tick in D_ticks: + axarr[0].plot((-10, xmax+10), (tick, tick), ls='dotted', lw=0.5, color='black', alpha=0.4, zorder=0) + # G loss + ax_G = axarr[0].twinx() + g_handle, = ax_G.plot(da.epoch, da.G_loss, color='green', ls='dashed', label='generator') + ax_G.set_ylabel('G loss') + if final: + G_ticks = [2.5, 5] + ax_G.get_yaxis().set_ticks(G_ticks) +# for tick in G_ticks: +# axarr[0].plot((-10, xmax+10), (tick, tick), ls='dotted', lw=0.5, color='green', alpha=1.0, zorder=0) + + ax_G.spines["top"].set_visible(False) + ax_G.spines["bottom"].set_visible(False) + ax_G.spines["right"].set_visible(False) + ax_G.spines["left"].set_visible(False) + ax_G.tick_params(bottom='off', right='off') + axarr[0].legend(handles=[d_handle, g_handle], labels=['discriminator', 'generator']) + + # mmd + da_mmd = da.loc[:, ['epoch', 'mmd2']].dropna() + axarr[1].plot(da_mmd.epoch, da_mmd.mmd2, color='purple') + axarr[1].set_ylabel('MMD$^2$') + #axarr[1].set_ylim(0.0, 0.04) + + #ax_that = axarr[1].twinx() + #ax_that.plot(da.that) + #ax_that.set_ylabel('$\hat{t}$') + #ax_that.set_ylim(0, 50) + if final: + mmd_ticks = [0.01, 0.02, 0.03] + axarr[1].get_yaxis().set_ticks(mmd_ticks) + for tick in mmd_ticks: + axarr[1].plot((-10, xmax+10), (tick, tick), ls='dotted', lw=0.5, color='black', alpha=0.4, zorder=0) + + # log likelihood + da_ll = da.loc[:, ['epoch', 'll', 'real_ll']].dropna() + axarr[2].plot(da_ll.epoch, da_ll.ll, color='orange') + axarr[2].plot(da_ll.epoch, da_ll.real_ll, color='orange', alpha=0.5) + axarr[2].set_ylabel('likelihood') + axarr[2].set_xlabel('epoch') + axarr[2].set_ylim(-750, 100) + #axarr[2].set_ylim(-10000000, 500) + if final: +# ll_ticks = [-1.0*1e7, -0.5*1e7, 0] + ll_ticks = [-500 ,-250, 0] + axarr[2].get_yaxis().set_ticks(ll_ticks) + for tick in ll_ticks: + axarr[2].plot((-10, xmax+10), (tick, tick), ls='dotted', lw=0.5, color='black', alpha=0.4, zorder=0) + + if dp: + assert da_dp.columns[0] == 'epoch' + epochs = da_dp['epoch'] + eps_values = da_dp.columns[1:] + for eps_string in eps_values: + if 'eps' in eps_string: + eps = eps_string[3:] + else: + eps = eps_string + deltas = da_dp[eps_string] + axarr[3].plot(epochs, deltas, label=eps) + axarr[3].set_ylabel('delta') + axarr[3].set_xlabel('epoch') + axarr[3].legend() + + # beautify + for ax in axarr: + #ax.spines["top"].set_visible(True) + ax.spines["top"].set_color((0, 0, 0, 0.3)) + #ax.spines["bottom"].set_visible(False) + ax.spines["bottom"].set_color((0, 0, 0, 0.3)) + #ax.spines["right"].set_visible(False) + ax.spines["right"].set_color((0, 0, 0, 0.3)) + #ax.spines["left"].set_visible(False) + ax.spines["left"].set_color((0, 0, 0, 0.3)) + ax.tick_params(bottom='off', left='off') + # make background grey + # ax.set_facecolor((0.96, 0.96, 0.96)) + ymin, ymax = ax.get_ylim() + for x in np.arange(0, xmax+10, 10): + ax.plot((x, x), (ymin, ymax), ls='dotted', lw=0.5, color='black', alpha=0.40, zorder=0) + ax.set_xlim(-5, xmax) + ax.get_yaxis().set_label_coords(-0.11,0.5) + + # bottom one + + fig.savefig('./experiments/traces/' + identifier + '_trace.png') + fig.savefig('./experiments/traces/' + identifier + '_trace.pdf') + plt.clf() + plt.close() + return True + + +def save_samples(vis_sample, identifier, epoch): + + np.save('./experiments/plots/gs/' + identifier + '_gs_%s.npy' % epoch, vis_sample) + + return True + +def save_samples_real(vis_real, identifier): + + np.save('./experiments/plots/gs/' + identifier + '_gs_real.npy', vis_real) + + return True + +def save_mnist_plot_sample(samples, idx, identifier, n_samples, labels=None): + """ + Generates a grid showing mnist digits. + + """ + assert n_samples <= samples.shape[0] + if not labels is None: + assert n_samples <= len(labels) + if len(labels.shape) > 1 and not labels.shape[1] == 1: + # one-hot + label_titles = np.argmax(labels, axis=1) + else: + label_titles = labels + else: + label_titles = ['NA']*n_samples + assert n_samples % 2 == 0 + img_size = int(np.sqrt(samples.shape[1])) + + nrow = int(n_samples/2) + ncol = 2 + fig, axarr = plt.subplots(nrow, ncol, sharex=True, figsize=(8, 8)) + for m in range(nrow): + # first column + sample = samples[m, :, 0] + axarr[m, 0].imshow(sample.reshape([img_size,img_size]), cmap='gray') + axarr[m, 0].set_title(str(label_titles[m])) + # second column + sample = samples[nrow + m, :, 0] + axarr[m, 1].imshow(sample.reshape([img_size,img_size]), cmap='gray') + axarr[m, 1].set_title(str(label_titles[m + nrow])) + fig.suptitle(idx) + fig.suptitle(idx) + fig.subplots_adjust(hspace = 0.15) + fig.savefig("./experiments/plots/" + identifier + "_epoch" + str(idx).zfill(4) + ".png") + plt.clf() + plt.close() + return + +def visualise_latent(Z, identifier): + """ + visualise a SINGLE point in the latent space + """ + seq_length = Z.shape[0] + latent_dim = Z.shape[1] + if latent_dim > 2: + print('WARNING: Only visualising first two dimensions of latent space.') + h = np.random.random() + colours = np.array([hsv_to_rgb((h, i/seq_length, 0.96)) for i in range(seq_length)]) +# plt.plot(Z[:, 0], Z[:, 1], c='grey', alpha=0.5) + for i in range(seq_length): + plt.scatter(Z[i, 0], Z[i, 1], marker='o', c=colours[i]) + plt.savefig('./experiments/plots/' + identifier + '_Z.png') + plt.clf() + plt.close() + return True + + +# --- to do with the model --- # +def plot_parameters(parameters, identifier): + """ + visualise the parameters of a GAN + """ + generator_out = parameters['generator/W_out_G:0'] + generator_weights = parameters['generator/rnn/lstm_cell/weights:0'] # split this into four + generator_matrices = np.split(generator_weights, 4, 1) + fig, axarr = plt.subplots(5, 1, sharex=True, + gridspec_kw = {'height_ratios':[0.2, 1, 1, 1, 1]}, figsize=(3,13)) + + axarr[0].matshow(generator_out.T, extent=[0,100,0,100]) + axarr[0].set_title('W_out_G') + axarr[1].matshow(generator_matrices[0]) + axarr[1].set_title('LSTM weights (1)') + axarr[2].matshow(generator_matrices[1]) + axarr[2].set_title('LSTM weights (2)') + axarr[3].matshow(generator_matrices[2]) + axarr[3].set_title('LSTM weights (3)') + axarr[4].matshow(generator_matrices[3]) + axarr[4].set_title('LSTM weights (4)') + for a in axarr: + a.set_xlim(0, 100) + a.set_ylim(0, 100) + a.spines["top"].set_visible(False) + a.spines["bottom"].set_visible(False) + a.spines["right"].set_visible(False) + a.spines["left"].set_visible(False) + a.get_xaxis().set_visible(False) + a.get_yaxis().set_visible(False) +# a.tick_params(bottom='off', left='off', top='off') + plt.tight_layout() + plt.savefig('./experiments/plots/' + identifier + '_weights.png') + return True + +### TSTR ### +def view_mnist_eval(identifier, train_X, train_Y, synth_X, synth_Y, test_X, test_Y, synth_predY, real_predY): + """ + Basically just + http://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html + """ + # resize everything + side_length = int(np.sqrt(train_X.shape[1])) + train_X = train_X.reshape(-1, side_length, side_length) + synth_X = synth_X.reshape(-1, side_length, side_length) + test_X = test_X.reshape(-1, side_length, side_length) + # remember, they're wrecked in the outer function thanks to python + synth_images_and_labels = list(zip(synth_X, synth_Y)) + for index, (image, label) in enumerate(synth_images_and_labels[:4]): + plt.subplot(4, 4, index + 1) + plt.axis('off') + plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') + if index == 0: + plt.title('synth train: %i' % label) + else: + plt.title('%i' % label) + train_images_and_labels = list(zip(train_X, train_Y)) + for index, (image, label) in enumerate(train_images_and_labels[:4]): + plt.subplot(4, 4, index + 5) + plt.axis('off') + plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') + if index == 0: + plt.title('real train: %i' % label) + else: + plt.title('%i' % label) + images_and_synthpreds = list(zip(test_X, synth_predY)) + for index, (image, prediction) in enumerate(images_and_synthpreds[:4]): + plt.subplot(4, 4, index + 9) + plt.axis('off') + plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') + if index == 0: + plt.title('synth pred: %i' % prediction) + else: + plt.title('%i' % prediction) + images_and_realpreds = list(zip(test_X, real_predY)) + for index, (image, prediction) in enumerate(images_and_realpreds[:4]): + plt.subplot(4, 4, index + 13) + plt.axis('off') + plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') + if index == 0: + plt.title('real pred: %i' % prediction) + else: + plt.title('%i' % prediction) + plt.tight_layout() + plt.title(identifier) + plt.savefig('./experiments/tstr/' + identifier + '_preds.png') + return True + +# --- nips !!! --- # +def nips_plot_rbf(sample, index, which='train'): + if which == 'train': +# col = '#167ea0' + col = '#13af5f' + else: + col = 'black' + sample_length = len(sample) + sample = sample.reshape(sample_length) + x_points = np.arange(sample_length) + fig, axarr = plt.subplots(1, 1, figsize=(2, 2)) + axarr.set_facecolor((0.95, 0.96, 0.96)) + axarr.plot(x_points, sample, color=col) + axarr.set_ylim(-1.5, 1.5) + axarr.get_xaxis().set_visible(False) + axarr.get_yaxis().set_visible(False) + axarr.spines["top"].set_visible(False) + axarr.spines["bottom"].set_visible(False) + axarr.spines["right"].set_visible(False) + axarr.spines["left"].set_visible(False) + axarr.tick_params(bottom='off', left='off') + plt.savefig('./plots/NIPS_rbf_' + which + '_' + str(index) + '.png') + plt.savefig('./plots/NIPS_rbf_' + which + '_' + str(index) + '.pdf') + plt.clf() + plt.close() + return True + +def nips_plot_sine(sample, index, which='train'): + if which == 'train': + #col = '#167ea0' + #col = '#13af5f' + col = '#1188ad' + else: + col = 'black' + sample_length = len(sample) + sample = sample.reshape(sample_length) + sample_length = len(sample) + sample = sample.reshape(sample_length) + x_points = np.arange(sample_length) + fig, axarr = plt.subplots(1, 1, figsize=(2, 2)) + axarr.set_facecolor((0.95, 0.96, 0.96)) + axarr.plot(x_points, sample, color=col) + axarr.set_ylim(-1.1, 1.1) + axarr.get_xaxis().set_visible(False) + axarr.get_yaxis().set_visible(False) + axarr.spines["top"].set_visible(False) + axarr.spines["bottom"].set_visible(False) + axarr.spines["right"].set_visible(False) + axarr.spines["left"].set_visible(False) + axarr.tick_params(bottom='off', left='off') + plt.savefig('./plots/NIPS_sine_' + which + '_' + str(index) + '.png') + plt.savefig('./plots/NIPS_sine_' + which + '_' + str(index) + '.pdf') + plt.clf() + plt.close() + return True + +def nips_plot_mnist(sample, index, which='train'): + plt.axis('off') + plt.imshow(sample, cmap=plt.cm.gray, interpolation='nearest') + plt.savefig('./plots/NIPS_mnist_' + which + '_' + str(index) + '.png') + plt.savefig('./plots/NIPS_mnist_' + which + '_' + str(index) + '.pdf') + plt.clf() + plt.close() + return True