forked from LukasKG/SHL_GAN
-
Notifications
You must be signed in to change notification settings - Fork 1
/
statistical_analysis.py
109 lines (81 loc) · 2.71 KB
/
statistical_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# -*- coding: utf-8 -*-
from sklearn import preprocessing
import numpy as np
import matplotlib.pyplot as plt
from network import save_fig
import data_source as ds
import GAN
params = GAN.get_params(name='Loc_analysis',log_name='Loc_analysis',FX_sel='basic')
features = ds.FEATURES[params['FX_sel']]
locations = ['bag','hand','hips','torso']
def load_data(params,dataset):
X, _ = ds.load_data(params,dataset)
return X
def get_data(dset):
data = []
for loc in locations:
params['location'] = loc
X = load_data(params,dset)
data.append([np.mean(X,axis=0),np.std(X,axis=0)])
params['location'] = 'test'
X = load_data(params,'test')
data.append([np.mean(X,axis=0),np.std(X,axis=0)])
return np.array(data)
def normalize(data):
''' x normalized = (x – x minimum) / (x maximum – x minimum) '''
feat = data.shape[2]
mode = data.shape[1]
loc = data.shape[0]
data_new = np.zeros_like(data)
for y in range(mode):
for z in range(feat):
x_min = data[:,y,z].min()
x_max = data[:,y,z].max()
data_new[:,y,z] = (data[:,y,z]-x_min)/((x_max-x_min))
return data_new
def standardize(data):
''' x standardized = (x – u) / o '''
feat = data.shape[2]
mode = data.shape[1]
loc = data.shape[0]
data_new = np.zeros_like(data)
for y in range(mode):
for z in range(feat):
u = data[:,y,z].mean()
o = data[:,y,z].std()
if o == 0.0:
data_new[:,y,z] = np.zeros_like(data[:,y,z])
print(data[:,y,z],u,o)
else:
data_new[:,y,z] = (data[:,y,z]-u)/o
return data_new
def plot(data,name):
x = range(1,data.shape[2]+1)
for sub in data:
plt.errorbar(x, sub[0], sub[1], linestyle='None', marker='x',alpha=0.8)
plt.legend(locations+['target'])
plt.xlabel('Feature')
plt.ylabel('Value')
plt.xlim(0,data.shape[2]+1)
plt.grid()
fig = plt.gcf()
save_fig(params,name,fig)
def calc_dis(data):
tar = data[len(locations),0]
for i,loc in enumerate(locations):
X = data[i,0]
diff = np.sum((tar-X)**2)**(0.5)
print(" - diff:",loc,'=',diff)
def run(dset):
data = get_data(dset)
data_norm = normalize(data)
data_stand = standardize(data)
plot(data_norm,dset+'_normalized')
plot(data_stand,dset+'_standardized')
print("Normalize")
calc_dis(data_norm)
print("\nStandardize")
calc_dis(data_stand)
if __name__ == "__main__":
run('train')
run('validation')