Skip to content

Commit

Permalink
code rebuild
Browse files Browse the repository at this point in the history
  • Loading branch information
euisuk-chung committed Dec 13, 2021
1 parent d16c3ab commit c7ad394
Show file tree
Hide file tree
Showing 8 changed files with 85 additions and 51 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,13 @@ python run_vrae.py

<img src = 'https://github.com/euisuk-chung/timeseries-generation/blob/main/image/TimeGAN_architecture.PNG?raw=true' width="650" height="400">

- 코드 작성자 : 박경찬
- pyTorch implementation for `TimeGAN`
- Code Reference : https://github.com/d9n13lt4n/timegan-pytorch

### Variational Recurrent AutoEncoder (VRAE)

<img src = 'https://github.com/euisuk-chung/timeseries-generation/blob/main/image/LSTM_VAE_architecture.png?raw=true' width="650" height="400">

- 코드 작성자 : 정의석
- pyTorch implementation for `VRAE`
- Code Reference : https://github.com/tejaslodaya/timeseries-clustering-vae

Expand Down Expand Up @@ -68,7 +66,10 @@ TimeGAN has 2 Modes, which is used to decide whether to train or generate :
VRAE has 3 Modes, which is used to decide whether to train or generate(train) or generate(test) :
1. is_train (default = True) : train model with loaded train data (window_size=30, stride=1)
2. is_generate_train (default = True) : generate train dataset loaded sequentially (window_size=stride)
3. is_generate_test (default = True) : generate test dataset loaded sequentially (window_size=stride)
3. is_generate_test (default = False) : generate test dataset loaded sequentially (window_size=stride)

✨ The query for train/test split in my code is currently used for my side-project.
✨ If you want to use train/test you need to go to `utils.custom_dataset` and change the query.

```
# Mode 1 : Train mode
Expand Down
21 changes: 19 additions & 2 deletions config_vrae.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,22 @@ def parser_setting(parser):
parser.add_argument(
'--file_name',
default='netis',
type=str)
type=str)
parser.add_argument(
'--cols_to_remove',
default='Time',
type=str,
nargs='*',
help = 'Columns to Remove')
parser.add_argument(
"--split",
type=str2bool,
default=False,
help = 'Argument for Train/Test split')
parser.add_argument(
'--time_gap',
default=100,
type=int)

# train/generate argument
parser.add_argument(
Expand All @@ -40,10 +55,12 @@ def parser_setting(parser):
"--is_generate_train",
type=str2bool,
default=True)

# TODO : For further testing
parser.add_argument(
"--is_generate_test",
type=str2bool,
default=True)
default=False)

# rescaling argument
parser.add_argument(
Expand Down
2 changes: 1 addition & 1 deletion models/vrae.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ def _train(self, train_loader):
if (t + 1) % self.print_every == 0:
print('Batch %d, loss = %.4f, recon_loss = %.4f, kl_loss = %.4f' % (t + 1, loss.item(),
recon_loss.item(), kl_loss.item()))

print('Average loss: {:.4f}'.format(epoch_loss / t))

return epoch_loss / t
Expand Down
26 changes: 18 additions & 8 deletions run_vrae.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,21 +38,23 @@
WINDOW_SIZE = args.window_size # Window size
scale_type = args.scale_type # Scaler Type 설정 ('Standard' or 'MinMax' or 'Robust')
undo = args.undo # reconstruction 시 unscale 수행여부
cols_to_remove = args.cols_to_remove # 제거할 변수
split = args.split # TRAIN/TEST Split 여부
time_gap = args.time_gap # 데이터수집단위


# Load & Scale data
TRAIN_DF, TEST_DF, TRAIN_SCALED, TEST_SCALED, TRAIN_Time, TEST_Time, cols, scaler = load_gen_data(file_name = file_name, scale_type = scale_type)
TRAIN_DF, TEST_DF, TRAIN_SCALED, TEST_SCALED, TRAIN_Time, TEST_Time, cols, scaler = load_gen_data(file_name = file_name, \
scale_type = scale_type,\
cols_to_remove = cols_to_remove,\
split = split)

# under custom_dataset.py
## Train dataset with stride
train_dataset = NetisDataset(data = TRAIN_SCALED, timestamps = TRAIN_Time, window_size = WINDOW_SIZE, stride =1)

## Test dataset with no window collapse (for generation window size must be WINDOW_SIZE)
train_gen_dataset = NetisDataset(data = TRAIN_SCALED, timestamps = TRAIN_Time, window_size = WINDOW_SIZE, stride = WINDOW_SIZE)

test_gen_dataset = NetisDataset(data = TEST_SCALED, timestamps = TEST_Time, window_size = WINDOW_SIZE, stride = WINDOW_SIZE)
train_dataset = TimeSeriesDataset(data = TRAIN_SCALED, timestamps = TRAIN_Time, window_size = WINDOW_SIZE, stride =1, time_gap = time_gap)

# SET ARGUMENTS
args.dload = "./save_model"
args.dload = "./save_model_test" # CHANGE BACK TO NORMAL AFTER TESTING
args.sequence_length = WINDOW_SIZE
args.number_of_features = train_dataset[0].shape[1]

Expand All @@ -78,6 +80,10 @@

# TRAIN dataset reconstruction
if args.is_generate_train:

# define train generation data
train_gen_dataset = TimeSeriesDataset(data = TRAIN_SCALED, timestamps = TRAIN_Time, window_size = WINDOW_SIZE, stride = WINDOW_SIZE, time_gap = time_gap)

# FOR GENERATION MUST HAVE batch_size 1
args.batch_size = 1

Expand Down Expand Up @@ -128,6 +134,10 @@

# TEST dataset reconstruction
if args.is_generate_test:

# define test generation data
test_gen_dataset = TimeSeriesDataset(data = TEST_SCALED, timestamps = TEST_Time, window_size = WINDOW_SIZE, stride = WINDOW_SIZE, time_gap = time_gap)

# FOR GENERATION MUST HAVE batch_size 1
args.batch_size = 1

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
80 changes: 43 additions & 37 deletions utils/custom_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,43 +20,50 @@ def fix_seed(seed: int) -> None:
random.seed(seed)

# load generation data
def load_gen_data(file_name, scale_type = 'Standard', cols_to_remove = None):
def load_gen_data(file_name, scale_type = 'MinMax',\
cols_to_remove = ['Time'], split = False):
"""
file_name: file_name in data location
scale_type : choose scaling type
"""

# define path(must be in pkl file)
data_loc = f'./data/netis/{file_name}.pkl'
# define path(must be uder data folder and in pkl file)
data_loc = f'./data/{file_name}.pkl'

# get data
with open(data_loc, 'rb') as f:
df = pickle.load(f)

# if needed remove columns that is not necessary
if cols_to_remove != None:
df = df_total.drop(cols_to_remove, axis=1)

# 결측치 제거
df = df.dropna()

# TRAIN TEST SPLIT
# TRAIN
TRAIN_DF = df.query('Time < 20211103184400 or Time > 20211106084400 and label==0')

# TEST(GET ONLY 정상)
TEST_DF = df.query('Time >= 20211103184400 and Time <= 20211106084400 and label==0')

TOTAL_DF = df.to_numpy()

# REMOVE TIME & LABEL
# TRAIN/TEST SPLIT 없으면 전체 데이터 사용
if split == True :
# Netis 데이터 기준으로 작성되어 있기때문에 도메인에 맞도록 Train/Test Split을 진행함
TOTAL_DF = df
TRAIN_DF = df.query('Time < 20211103184400 or Time > 20211106084400 and label==0')
TEST_DF = df.query('Time >= 20211103184400 and Time <= 20211106084400 and label==0')
else:
TOTAL_DF = df
TRAIN_DF = df
TEST_DF = df

# 시간 정보 저장
TRAIN_Time = TRAIN_DF['Time']
TEST_Time = TEST_DF['Time']

# remove time & label
TRAIN_DF = TRAIN_DF.iloc[:,1:-1]
TEST_DF = TEST_DF.iloc[:,1:-1]
# if needed remove columns that is not necessary
# 지정 변수 제거
if cols_to_remove != None:
TOTAL_DF = TOTAL_DF.drop(cols_to_remove, axis=1)
TRAIN_DF = TRAIN_DF.drop(cols_to_remove, axis=1)
TEST_DF = TEST_DF.drop(cols_to_remove, axis=1)

cols = TRAIN_DF.columns
# Get column Info
cols = TOTAL_DF.columns

# To numpy
TOTAL_DF = TOTAL_DF.to_numpy()
TRAIN_DF = TRAIN_DF.to_numpy()
TEST_DF = TEST_DF.to_numpy()

Expand All @@ -74,22 +81,9 @@ def load_gen_data(file_name, scale_type = 'Standard', cols_to_remove = None):

return TRAIN_DF, TEST_DF, TRAIN_SCALED, TEST_SCALED, TRAIN_Time, TEST_Time, cols, scaler

# with no window collapsing
class GenerationDataset(Dataset):
def __init__(self, data, window):
self.data = torch.Tensor(data)
self.window = window

def __len__(self):
return len(self.data) // self.window # -1

def __getitem__(self, index):
x = self.data[index*self.window:(index+1)*(self.window)]
return x

# loader with stride
class NetisDataset(Dataset):
def __init__(self, data, timestamps, window_size, stride=1):
class TimeSeriesDataset(Dataset):
def __init__(self, data, timestamps, window_size, stride=1, time_gap=100):
self.data = torch.from_numpy(np.array(data))
self.ts = np.array(timestamps)
self.valid_idxs = []
Expand All @@ -100,7 +94,7 @@ def __init__(self, data, timestamps, window_size, stride=1):
R = L + self.window_size - 1

# append val indexs
if self.ts[R]-self.ts[L] == (self.window_size-1)*100:
if self.ts[R]-self.ts[L] == (self.window_size-1)*time_gap:
self.valid_idxs.append(L)

self.valid_idxs = np.array(self.valid_idxs, dtype=np.int32)[::stride]
Expand All @@ -116,3 +110,15 @@ def __getitem__(self, index):
return x.float()


# with no window collapsing
class GenerationDataset(Dataset):
def __init__(self, data, window):
self.data = torch.Tensor(data)
self.window = window

def __len__(self):
return len(self.data) // self.window # -1

def __getitem__(self, index):
x = self.data[index*self.window:(index+1)*(self.window)]
return x

0 comments on commit c7ad394

Please sign in to comment.