diff --git a/README.md b/README.md
index a344739..a09f4ba 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,6 @@ python run_vrae.py
-- 코드 작성자 : 박경찬
- pyTorch implementation for `TimeGAN`
- Code Reference : https://github.com/d9n13lt4n/timegan-pytorch
@@ -37,7 +36,6 @@ python run_vrae.py
-- 코드 작성자 : 정의석
- pyTorch implementation for `VRAE`
- Code Reference : https://github.com/tejaslodaya/timeseries-clustering-vae
@@ -68,7 +66,10 @@ TimeGAN has 2 Modes, which is used to decide whether to train or generate :
VRAE has 3 Modes, which is used to decide whether to train or generate(train) or generate(test) :
1. is_train (default = True) : train model with loaded train data (window_size=30, stride=1)
2. is_generate_train (default = True) : generate train dataset loaded sequentially (window_size=stride)
-3. is_generate_test (default = True) : generate test dataset loaded sequentially (window_size=stride)
+3. is_generate_test (default = False) : generate test dataset loaded sequentially (window_size=stride)
+
+✨ The query for train/test split in my code is currently used for my side-project.
+✨ If you want to use train/test you need to go to `utils.custom_dataset` and change the query.
```
# Mode 1 : Train mode
diff --git a/config_vrae.py b/config_vrae.py
index c462cbc..7c6fe31 100644
--- a/config_vrae.py
+++ b/config_vrae.py
@@ -29,7 +29,22 @@ def parser_setting(parser):
parser.add_argument(
'--file_name',
default='netis',
- type=str)
+ type=str)
+ parser.add_argument(
+ '--cols_to_remove',
+ default='Time',
+ type=str,
+ nargs='*',
+ help = 'Columns to Remove')
+ parser.add_argument(
+ "--split",
+ type=str2bool,
+ default=False,
+ help = 'Argument for Train/Test split')
+ parser.add_argument(
+ '--time_gap',
+ default=100,
+ type=int)
# train/generate argument
parser.add_argument(
@@ -40,10 +55,12 @@ def parser_setting(parser):
"--is_generate_train",
type=str2bool,
default=True)
+
+ # TODO : For further testing
parser.add_argument(
"--is_generate_test",
type=str2bool,
- default=True)
+ default=False)
# rescaling argument
parser.add_argument(
diff --git a/models/vrae.py b/models/vrae.py
index 4d076fa..ade24df 100644
--- a/models/vrae.py
+++ b/models/vrae.py
@@ -327,7 +327,7 @@ def _train(self, train_loader):
if (t + 1) % self.print_every == 0:
print('Batch %d, loss = %.4f, recon_loss = %.4f, kl_loss = %.4f' % (t + 1, loss.item(),
recon_loss.item(), kl_loss.item()))
-
+
print('Average loss: {:.4f}'.format(epoch_loss / t))
return epoch_loss / t
diff --git a/run_vrae.py b/run_vrae.py
index 0eeec1c..846ec4d 100644
--- a/run_vrae.py
+++ b/run_vrae.py
@@ -38,21 +38,23 @@
WINDOW_SIZE = args.window_size # Window size
scale_type = args.scale_type # Scaler Type 설정 ('Standard' or 'MinMax' or 'Robust')
undo = args.undo # reconstruction 시 unscale 수행여부
+cols_to_remove = args.cols_to_remove # 제거할 변수
+split = args.split # TRAIN/TEST Split 여부
+time_gap = args.time_gap # 데이터수집단위
+
# Load & Scale data
-TRAIN_DF, TEST_DF, TRAIN_SCALED, TEST_SCALED, TRAIN_Time, TEST_Time, cols, scaler = load_gen_data(file_name = file_name, scale_type = scale_type)
+TRAIN_DF, TEST_DF, TRAIN_SCALED, TEST_SCALED, TRAIN_Time, TEST_Time, cols, scaler = load_gen_data(file_name = file_name, \
+ scale_type = scale_type,\
+ cols_to_remove = cols_to_remove,\
+ split = split)
# under custom_dataset.py
## Train dataset with stride
-train_dataset = NetisDataset(data = TRAIN_SCALED, timestamps = TRAIN_Time, window_size = WINDOW_SIZE, stride =1)
-
-## Test dataset with no window collapse (for generation window size must be WINDOW_SIZE)
-train_gen_dataset = NetisDataset(data = TRAIN_SCALED, timestamps = TRAIN_Time, window_size = WINDOW_SIZE, stride = WINDOW_SIZE)
-
-test_gen_dataset = NetisDataset(data = TEST_SCALED, timestamps = TEST_Time, window_size = WINDOW_SIZE, stride = WINDOW_SIZE)
+train_dataset = TimeSeriesDataset(data = TRAIN_SCALED, timestamps = TRAIN_Time, window_size = WINDOW_SIZE, stride =1, time_gap = time_gap)
# SET ARGUMENTS
-args.dload = "./save_model"
+args.dload = "./save_model_test" # CHANGE BACK TO NORMAL AFTER TESTING
args.sequence_length = WINDOW_SIZE
args.number_of_features = train_dataset[0].shape[1]
@@ -78,6 +80,10 @@
# TRAIN dataset reconstruction
if args.is_generate_train:
+
+ # define train generation data
+ train_gen_dataset = TimeSeriesDataset(data = TRAIN_SCALED, timestamps = TRAIN_Time, window_size = WINDOW_SIZE, stride = WINDOW_SIZE, time_gap = time_gap)
+
# FOR GENERATION MUST HAVE batch_size 1
args.batch_size = 1
@@ -128,6 +134,10 @@
# TEST dataset reconstruction
if args.is_generate_test:
+
+ # define test generation data
+ test_gen_dataset = TimeSeriesDataset(data = TEST_SCALED, timestamps = TEST_Time, window_size = WINDOW_SIZE, stride = WINDOW_SIZE, time_gap = time_gap)
+
# FOR GENERATION MUST HAVE batch_size 1
args.batch_size = 1
diff --git a/save_model_test/VRAE_MinMax_un_False_hidden_1_win_10_ep_10.pth b/save_model_test/VRAE_MinMax_un_False_hidden_1_win_10_ep_10.pth
new file mode 100644
index 0000000..11e027a
Binary files /dev/null and b/save_model_test/VRAE_MinMax_un_False_hidden_1_win_10_ep_10.pth differ
diff --git a/save_model_test/VRAE_MinMax_un_False_hidden_1_win_30_ep_1.pth b/save_model_test/VRAE_MinMax_un_False_hidden_1_win_30_ep_1.pth
new file mode 100644
index 0000000..f62e8b9
Binary files /dev/null and b/save_model_test/VRAE_MinMax_un_False_hidden_1_win_30_ep_1.pth differ
diff --git a/save_model_test/VRAE_MinMax_un_False_hidden_1_win_5_ep_5.pth b/save_model_test/VRAE_MinMax_un_False_hidden_1_win_5_ep_5.pth
new file mode 100644
index 0000000..7de8586
Binary files /dev/null and b/save_model_test/VRAE_MinMax_un_False_hidden_1_win_5_ep_5.pth differ
diff --git a/utils/custom_dataset.py b/utils/custom_dataset.py
index 11917cc..6ecc801 100644
--- a/utils/custom_dataset.py
+++ b/utils/custom_dataset.py
@@ -20,43 +20,50 @@ def fix_seed(seed: int) -> None:
random.seed(seed)
# load generation data
-def load_gen_data(file_name, scale_type = 'Standard', cols_to_remove = None):
+def load_gen_data(file_name, scale_type = 'MinMax',\
+ cols_to_remove = ['Time'], split = False):
"""
file_name: file_name in data location
+ scale_type : choose scaling type
"""
- # define path(must be in pkl file)
- data_loc = f'./data/netis/{file_name}.pkl'
+ # define path(must be uder data folder and in pkl file)
+ data_loc = f'./data/{file_name}.pkl'
# get data
with open(data_loc, 'rb') as f:
df = pickle.load(f)
- # if needed remove columns that is not necessary
- if cols_to_remove != None:
- df = df_total.drop(cols_to_remove, axis=1)
-
+ # 결측치 제거
df = df.dropna()
- # TRAIN TEST SPLIT
- # TRAIN
- TRAIN_DF = df.query('Time < 20211103184400 or Time > 20211106084400 and label==0')
-
- # TEST(GET ONLY 정상)
- TEST_DF = df.query('Time >= 20211103184400 and Time <= 20211106084400 and label==0')
-
- TOTAL_DF = df.to_numpy()
-
- # REMOVE TIME & LABEL
+ # TRAIN/TEST SPLIT 없으면 전체 데이터 사용
+ if split == True :
+ # Netis 데이터 기준으로 작성되어 있기때문에 도메인에 맞도록 Train/Test Split을 진행함
+ TOTAL_DF = df
+ TRAIN_DF = df.query('Time < 20211103184400 or Time > 20211106084400 and label==0')
+ TEST_DF = df.query('Time >= 20211103184400 and Time <= 20211106084400 and label==0')
+ else:
+ TOTAL_DF = df
+ TRAIN_DF = df
+ TEST_DF = df
+
+ # 시간 정보 저장
TRAIN_Time = TRAIN_DF['Time']
TEST_Time = TEST_DF['Time']
- # remove time & label
- TRAIN_DF = TRAIN_DF.iloc[:,1:-1]
- TEST_DF = TEST_DF.iloc[:,1:-1]
+ # if needed remove columns that is not necessary
+ # 지정 변수 제거
+ if cols_to_remove != None:
+ TOTAL_DF = TOTAL_DF.drop(cols_to_remove, axis=1)
+ TRAIN_DF = TRAIN_DF.drop(cols_to_remove, axis=1)
+ TEST_DF = TEST_DF.drop(cols_to_remove, axis=1)
- cols = TRAIN_DF.columns
+ # Get column Info
+ cols = TOTAL_DF.columns
+ # To numpy
+ TOTAL_DF = TOTAL_DF.to_numpy()
TRAIN_DF = TRAIN_DF.to_numpy()
TEST_DF = TEST_DF.to_numpy()
@@ -74,22 +81,9 @@ def load_gen_data(file_name, scale_type = 'Standard', cols_to_remove = None):
return TRAIN_DF, TEST_DF, TRAIN_SCALED, TEST_SCALED, TRAIN_Time, TEST_Time, cols, scaler
-# with no window collapsing
-class GenerationDataset(Dataset):
- def __init__(self, data, window):
- self.data = torch.Tensor(data)
- self.window = window
-
- def __len__(self):
- return len(self.data) // self.window # -1
-
- def __getitem__(self, index):
- x = self.data[index*self.window:(index+1)*(self.window)]
- return x
-
# loader with stride
-class NetisDataset(Dataset):
- def __init__(self, data, timestamps, window_size, stride=1):
+class TimeSeriesDataset(Dataset):
+ def __init__(self, data, timestamps, window_size, stride=1, time_gap=100):
self.data = torch.from_numpy(np.array(data))
self.ts = np.array(timestamps)
self.valid_idxs = []
@@ -100,7 +94,7 @@ def __init__(self, data, timestamps, window_size, stride=1):
R = L + self.window_size - 1
# append val indexs
- if self.ts[R]-self.ts[L] == (self.window_size-1)*100:
+ if self.ts[R]-self.ts[L] == (self.window_size-1)*time_gap:
self.valid_idxs.append(L)
self.valid_idxs = np.array(self.valid_idxs, dtype=np.int32)[::stride]
@@ -116,3 +110,15 @@ def __getitem__(self, index):
return x.float()
+# with no window collapsing
+class GenerationDataset(Dataset):
+ def __init__(self, data, window):
+ self.data = torch.Tensor(data)
+ self.window = window
+
+ def __len__(self):
+ return len(self.data) // self.window # -1
+
+ def __getitem__(self, index):
+ x = self.data[index*self.window:(index+1)*(self.window)]
+ return x
\ No newline at end of file