Skip to content

Commit

Permalink
added lines
Browse files Browse the repository at this point in the history
  • Loading branch information
euisuk-chung committed Dec 6, 2021
1 parent 9930562 commit 7b474c0
Show file tree
Hide file tree
Showing 5 changed files with 250 additions and 11 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ data/
etc/
result/
save_model/
vae_gen_data/
gan_gen_data/
gen_data_vae/
gen_data_gan/
tensorboard/

### Notebook ###
Expand Down
3 changes: 2 additions & 1 deletion run_timegan.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@
print('>>>> TRAINING COMPLETE!')
if args.is_generate:
gen_data=timegan_generator(model, args.num_generation, args)
np.save(f'./gan_gen_data/gen_data',gen_data)
np.save(f'./gen_data_gan/gen_data',gen_data)
print('>>>> GENERATION COMPLETE!')



16 changes: 8 additions & 8 deletions run_vrae.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,13 @@

# save original data
train_org = pd.DataFrame(TRAIN_DF if args.undo == True else TRAIN_SCALED, columns= cols)
train_org.to_csv(f'./vae_gen_data/train/original_{args.scale_type}_un_{args.undo}.csv')
print('>> SAVED TRAIN ORIGINAL Data!! (Loc: vae_gen_data)')
train_org.to_csv(f'./gen_data_vae/train/original_{args.scale_type}_un_{args.undo}.csv')
print('>> SAVED TRAIN ORIGINAL Data!! (Loc: gen_data_vae)')

# save reconstructed data
train_gen = pd.DataFrame(train_recon, columns= cols)
train_gen.to_csv(f'./vae_gen_data/train/VRAE_{args.scale_type}_un_{args.undo}_hidden_{args.hidden_layer_depth}_win_{args.sequence_length}_ep_{args.n_epochs}.csv')
print('>> SAVED TRAIN RECONSTRUCTED Data!! (Loc: vae_gen_data)')
train_gen.to_csv(f'./gen_data_vae/train/VRAE_{args.scale_type}_un_{args.undo}_hidden_{args.hidden_layer_depth}_win_{args.sequence_length}_ep_{args.n_epochs}.csv')
print('>> SAVED TRAIN RECONSTRUCTED Data!! (Loc: gen_data_vae)')

# TEST dataset reconstruction
if args.is_generate_test:
Expand All @@ -132,13 +132,13 @@

# save original data
test_org = pd.DataFrame(TRAIN_DF if args.undo == True else TRAIN_SCALED, columns= cols)
test_org.to_csv(f'./vae_gen_data/test/original_{args.scale_type}_un_{args.undo}.csv')
print('>> SAVED TEST ORIGINAL Data!! (Loc: vae_gen_data)')
test_org.to_csv(f'./gen_data_vae/test/original_{args.scale_type}_un_{args.undo}.csv')
print('>> SAVED TEST ORIGINAL Data!! (Loc: gen_data_vae)')

# save reconstructed data
test_gen = pd.DataFrame(test_recon, columns= cols)
test_gen.to_csv(f'./vae_gen_data/test/VRAE_{args.scale_type}_un_{args.undo}_hidden_{args.hidden_layer_depth}_win_{args.sequence_length}_ep_{args.n_epochs}.csv')
print('>> SAVED TEST RECONSTRUCTED Data!! (Loc: vae_gen_data)')
test_gen.to_csv(f'./gen_data_vae/test/VRAE_{args.scale_type}_un_{args.undo}_hidden_{args.hidden_layer_depth}_win_{args.sequence_length}_ep_{args.n_epochs}.csv')
print('>> SAVED TEST RECONSTRUCTED Data!! (Loc: gen_data_vae)')

# IF Both TRAIN and TEST data reconstruction is conducted
if args.is_generate_train and args.is_generate_test:
Expand Down
155 changes: 155 additions & 0 deletions utils/TSTR.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import torch
from tqdm import tqdm, trange
import numpy as np
from sklearn.metrics import accuracy_score, mean_squared_error

class GeneralRNN(torch.nn.Module):
r"""A general RNN model for time-series prediction
"""

def __init__(self, args):
super(GeneralRNN, self).__init__()
self.model_type = args['model_type']

self.input_size = args['in_dim']
self.hidden_size = args['h_dim']
self.output_size = args['out_dim']
self.num_layers = args['n_layers']
self.dropout = args['dropout']
self.bidirectional = args['bidirectional']

self.max_seq_len = args['max_seq_len']

self.rnn_module = self._get_rnn_module(self.model_type)

self.rnn_layer = self.rnn_module(
input_size=self.input_size,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
batch_first=True,
dropout=self.dropout,
bidirectional=self.bidirectional
)

self.linear_layer = torch.nn.Linear(
in_features=self.hidden_size,
out_features=self.output_size
)

def _get_rnn_module(self, model_type):
if model_type == "rnn":
return torch.nn.RNN
elif model_type == "lstm":
return torch.nn.LSTM
elif model_type == "gru":
return torch.nn.GRU

def forward(self, X):
# Dynamic RNN input for ignoring paddings
H_o, H_t = self.rnn_layer(X)
logits = self.linear_layer(H_o)

return logits

def rmse_error(y_true, y_pred):
"""User defined root mean squared error.
Args:
- y_true: true labels
- y_pred: predictions
Returns:
- computed_rmse: computed rmse loss
"""
# Exclude masked labels
idx = (y_true >= 0) * 1
# Mean squared loss excluding masked labels
computed_mse = np.sum(idx * ((y_true - y_pred) ** 2)) / np.sum(idx)
computed_rmse = np.sqrt(computed_mse)
return computed_rmse

def one_step_ahead_prediction(train_data, test_data):
"""Use the previous time-series to predict one-step ahead feature values.
Args:
- train_data: training time-series
- test_data: testing time-series
Returns:
- perf: average performance of one-step ahead predictions (in terms of AUC or MSE)
"""
train_data = train_data
test_data = test_data

# Parameters
no, seq_len, dim = 256, 30, 92

# Set model parameters
args = {}
args["device"] = "cuda"
args["task"] = "regression"
args["model_type"] = "gru"
args["bidirectional"] = False
args["epochs"] = 20
args["batch_size"] = 256
args["in_dim"] = dim
args["h_dim"] = dim
args["out_dim"] = dim
args["n_layers"] = 3
args["dropout"] = 0.5
args["max_seq_len"] = 30 # only 29 is used for prediction
args["learning_rate"] = 1e-3
args["grad_clip_norm"] = 5.0

# Set training features and labels
train_dataloader = torch.utils.data.DataLoader(
train_data,
batch_size=args["batch_size"],
shuffle=True
)

# Set testing features and labels
test_dataloader = torch.utils.data.DataLoader(
test_data,
batch_size=no,
shuffle=True
)
# Initialize model
model = GeneralRNN(args)
model.to(args["device"])
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(
model.parameters(),
lr=args["learning_rate"]
)

# Train the predictive model
logger = trange(args["epochs"], desc=f"Epoch: 0, Loss: 0")
for epoch in logger:
running_loss = 0.0

for train_x in train_dataloader:
train_x = train_x.to(args["device"])
# zero the parameter gradients
optimizer.zero_grad()
# forward
train_p = model(train_x[:,0:29,:])
loss = criterion(train_p, train_x[:,1:30,:])
# backward
loss.backward()
# optimize
optimizer.step()

running_loss += loss.item()

logger.set_description(f"Epoch: {epoch}, Loss: {running_loss:.4f}")

# Evaluate the trained model
with torch.no_grad():
perf = 0
for test_x in test_dataloader:
test_x = test_x.to(args["device"])
test_p = model(test_x[:,0:29,:]).cpu()

test_p = np.reshape(test_p.numpy(), [-1])
test_y = np.reshape(test_x[:,1:30,:].cpu().numpy(), [-1])

perf += rmse_error(test_y, test_p)

return perf
83 changes: 83 additions & 0 deletions utils/visualization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np


def visualization(ori_data, generated_data, analysis):
"""Using PCA or tSNE for generated and original data visualization.
Args:
- ori_data: original data
- generated_data: generated synthetic data
- analysis: tsne or pca
"""
# Analysis sample size (for faster computation)
anal_sample_no = min([1000, len(ori_data)])
idx = np.random.permutation(len(ori_data))[:anal_sample_no]

# Data preprocessing
ori_data = np.asarray(ori_data)
generated_data = np.asarray(generated_data)

ori_data = ori_data[idx]
generated_data = generated_data[idx]

no, seq_len, dim = ori_data.shape

for i in range(anal_sample_no):
if (i == 0):
prep_data = np.reshape(np.mean(ori_data[0, :, :], 1), [1, seq_len])
prep_data_hat = np.reshape(np.mean(generated_data[0, :, :], 1), [1, seq_len])
else:
prep_data = np.concatenate((prep_data,
np.reshape(np.mean(ori_data[i, :, :], 1), [1, seq_len])))
prep_data_hat = np.concatenate((prep_data_hat,
np.reshape(np.mean(generated_data[i, :, :], 1), [1, seq_len])))

# Visualization parameter
colors = ["tab:blue" for i in range(anal_sample_no)] + ["tab:orange" for i in range(anal_sample_no)]

if analysis == 'pca':
# PCA Analysis
pca = PCA(n_components=2)
pca.fit(prep_data)
pca_results = pca.transform(prep_data)
pca_hat_results = pca.transform(prep_data_hat)

# Plotting
f, ax = plt.subplots(1)
plt.scatter(pca_results[:, 0], pca_results[:, 1],
c=colors[:anal_sample_no], alpha=0.2, label="Original")
plt.scatter(pca_hat_results[:, 0], pca_hat_results[:, 1],
c=colors[anal_sample_no:], alpha=0.2, label="Synthetic")

ax.legend()
plt.title('PCA plot')
plt.xlabel('x-pca')
plt.ylabel('y_pca')
plt.show()

elif analysis == 'tsne':

# Do t-SNE Analysis together
prep_data_final = np.concatenate((prep_data, prep_data_hat), axis=0)

# TSNE anlaysis
tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
tsne_results = tsne.fit_transform(prep_data_final)

# Plotting
f, ax = plt.subplots(1)

plt.scatter(tsne_results[:anal_sample_no, 0], tsne_results[:anal_sample_no, 1],
c=colors[:anal_sample_no], alpha=0.2, label="Original")
plt.scatter(tsne_results[anal_sample_no:, 0], tsne_results[anal_sample_no:, 1],
c=colors[anal_sample_no:], alpha=0.2, label="Synthetic")

ax.legend()

plt.title('t-SNE plot')
plt.xlabel('x-tsne')
plt.ylabel('y_tsne')
plt.show()

0 comments on commit 7b474c0

Please sign in to comment.