-
Notifications
You must be signed in to change notification settings - Fork 1
/
AE_torch.py
148 lines (114 loc) · 4.02 KB
/
AE_torch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# -*- coding: utf-8 -*-
import torch
import torch.utils.data as utils
from torch.autograd import Variable
# general helper libraries
import pathlib
import os
import pandas as pd
import numpy as np
from numpy.random import seed # numpy random number set function
np.random.seed(9990)# 9990
torch.manual_seed(9990) # 9990
# read the data - note read in using pandas then convert from dataframe to numpy array then torch tensor
features=pd.read_csv("data/AEdata.csv",delimiter=",",header=None)
featuresnp=(features.to_numpy())
x = torch.from_numpy(featuresnp).double() # the cast to double is needed
labels=pd.read_csv("data/AEdata.csv",delimiter=",",header=None) # 1000 observations
labelsnp=(labels.to_numpy())
y = torch.from_numpy(labelsnp).double()
#print(y)
print(y.shape)
#print(x)
print(x.shape)
my_dataset = utils.TensorDataset(x,y) # create your datset
dataset = utils.DataLoader(my_dataset,batch_size=438) # create your dataloader
model = torch.nn.Sequential(
torch.nn.Linear(8, 1),
torch.nn.Identity(), # also Identity
torch.nn.Linear(1, 8),
torch.nn.Identity() # also Identity
)
print(model)
print(model[0:2:1])
loss_fn = torch.nn.MSELoss(reduction='mean')
model=model.double()
learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
curloss=1e+300
abserror=1e-05
maxiters=100000
minLossOverall=1e+300
for t in range(maxiters): # for each epoch - all training data run through once
running_loss=0.0
i=0
for input, target in dataset: # for each batch of training data update the current weights
optimizer.zero_grad()
output = model(input)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
#if i % 25 == (25-1): # print every 25 mini-batches
# print('[%d, %5d] loss: %.3f' %
# (t + 1, i + 1, running_loss))
# #running_loss = 0.0
i=i+1
#print("t=",t," ",i," ",running_loss)
if np.absolute(running_loss-curloss) <abserror:
# have good enough solution so stop
print("BREAK: iter=",t," ","loss=",running_loss,"\n")
break
else:
curloss=running_loss # copy loss
if ((t%100)==0):
print(t, curloss)
# print out parameters
print("---PARAMETERS-----\n")
for name, param in model.named_parameters():
if param.requires_grad:
print (name, param.data)
#modelNew=model[]
preds = model(x)
#print(preds.shape)
prednp=preds.detach().numpy()
#print("first 10 and last 10 probabilities output from model\n")
#print(prednp[0:10:1,:])
nrows=prednp.shape[0]
ncols=prednp.shape[1]
myloss=0.0
for i in range(nrows):
for j in range(ncols):
myloss+= (prednp[i,j]-labelsnp[i,j])*(prednp[i,j]-labelsnp[i,j])
print("MANUAL LOSS=",myloss/y.shape[0],"\n")
## print encoding of 8-dim vector into
new_model = torch.nn.Sequential(*list(model.children())[0:2:1]) ## only keep first two layers
#print(new_model(x))
encodepreds2dim=new_model(x)
# print out parameters
print("---PARAMETERS-----\n")
for name, param in new_model.named_parameters():
if param.requires_grad:
print (name, param.data)
new_model2 = torch.nn.Sequential(*list(model.children())[2:4:1]) ## only keep layers 2 and 3
#print(new_model2(encodepreds2dim))
decodepreds8dim=new_model2(encodepreds2dim);
# print out parameters
print("---PARAMETERS-----\n")
for name, param in new_model2.named_parameters():
if param.requires_grad:
print (name, param.data)
print("-----------\n")
# get encoded values in np
encodepreds2dimnp=encodepreds2dim.detach().numpy()
print(encodepreds2dimnp[0:10:1,:])
encodepd=pd.DataFrame(data=encodepreds2dimnp)
# get decoded values in np
decodepreds8dimnp=decodepreds8dim.detach().numpy()
print(decodepreds8dimnp[0:10:1,:])
# this works arithmetically just fine!
encodepd=pd.DataFrame(data=encodepreds2dimnp)
decodepd=pd.DataFrame(data=decodepreds8dimnp)
encodepd.to_csv("torch_encoded.csv", encoding='utf-8', index=False)
decodepd.to_csv("torch_decoded.csv", encoding='utf-8', index=False)