-
Notifications
You must be signed in to change notification settings - Fork 0
/
pc_viz.py
105 lines (87 loc) · 3.54 KB
/
pc_viz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import os, time, itertools, imageio, pickle
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
# For some reason this doesn't work with sklearn?
# from tensorflow.examples.tutorials.mnist import input_data
from sklearn.datasets import fetch_mldata
import math, pdb
from sklearn.decomposition import PCA
from tflearn.datasets import cifar10
from tflearn.data_utils import shuffle, to_categorical
n_pcs = 300
def pca_reconstruct(pca, dat0, n_components):
# Halfway through just start training on the real dataset
if n_components > 300:
return dat0
# Grab the principle components (forward pass through encoder)
X_train_pca = pca.transform(dat0)
# Only use the first n principle components
X_train_pca[:, n_components:] = 0
# Project back to original space (pass components through decoder)
X_projected = pca.inverse_transform(X_train_pca)
X_projected = (X_projected-np.min(X_projected))/(np.max(X_projected)-np.min(X_projected))
# train_set = mnist.train.images
# X_projected = (X_projected - 0.5) / 0.5 # normalization; range: -1 ~ 1
return X_projected
def show_pca(num_pcs, path = 'pca.png', original=False):
size_figure_grid = 5
fig, ax = plt.subplots(size_figure_grid, size_figure_grid, figsize=(5, 5))
if not original:
pca_samples = pca_reconstruct(pca, fixed_sample_batch, num_pcs)
else:
pca_samples = fixed_sample_batch
# Remove tiks
pca_samples = (pca_samples - np.max(pca_samples))/-np.ptp(pca_samples)
# c = (255*(a - np.max(a))/-np.ptp(a)).astype(int)
for i, j in itertools.product(range(size_figure_grid), range(size_figure_grid)):
ax[i, j].get_xaxis().set_visible(False)
ax[i, j].get_yaxis().set_visible(False)
# Fill images
for k in range(5*5):
i = k // 5
j = k % 5
ax[i, j].cla()
if dset == 'MNIST':
ax[i, j].imshow(np.reshape(pca_samples[k], (28, 28)), cmap='gray')
else:
ax[i, j].imshow(np.reshape(pca_samples[k], (32, 32, 3)))
label = 'Principle Component {0}'.format(num_pcs)
fig.text(0.5, 0.004, label, ha='center')
plt.show()
plt.savefig(path)
# load MNIST
dset = 'CIFAR'
if dset == 'MNIST':
mnist = fetch_mldata('MNIST original')
train_set = mnist.data[:].astype(np.float32)
if dset == 'CIFAR':
(train_set, Y), _ = cifar10.load_data()
train_set, Y = shuffle(train_set, Y)
train_set = np.reshape(train_set, (-1, 32*32*3))
# Mix 'er up
np.random.shuffle(train_set)
# Normalize 0,1
# train_set = (train_set-np.min(train_set))/(np.max(train_set)-np.min(train_set))
# train_set = mnist.train.images
# train_set = (train_set - 0.5) / 0.5 # normalization; range: -1 ~ 1
# train_set = (train_set - np.mean(train_set)) / np.std(train_set)
# Samples to reconstruct in pca visualization
fixed_sample_batch = train_set[:25]
# Fit PCA on MNIST
pca = PCA(n_components=n_pcs)
pca.fit(train_set)
if not os.path.isdir('PCA_Reconstruction'):
os.mkdir('PCA_Reconstruction')
if not os.path.isdir('PCA_Reconstruction/' + dset):
os.mkdir('PCA_Reconstruction/' + dset)
if not os.path.isdir('PCA_Reconstruction/' + dset + '/{0}_PCs'.format(n_pcs)):
os.mkdir('PCA_Reconstruction/' + dset + '/{0}_PCs'.format(n_pcs))
if not os.path.isdir('PCA_Reconstruction/' + dset):
os.mkdir('PCA_Reconstruction/' + dset)
# Save target
show_pca(0, 'PCA_Reconstruction/' + dset + '/' + str(n_pcs) + '_PCs/target', True)
pdb.set_trace()
for i in range(n_pcs):
path = 'PCA_Reconstruction/' + dset + '/' + str(n_pcs) + '_PCs/Principle_Component_' + str(i + 1) + '.png'
show_pca(i, path)