diff --git a/README.md b/README.md index a09f4ba..125a7fb 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Time Series Generation Github Repo for timeseries generation (시계열 생성) -## Purpose +## 1. Purpose Time Series Generation can be used for multiple purposes For example : @@ -9,76 +9,137 @@ For example : - Generating Simulations -## How to use -1. TimeGAN을 이용한 시계열 생성 +## 2. How to use -```python -python run_timegan.py +- You can check detail about the argument at `4. Model Parameters` + +### 2.1. Time Series Generation using TimeGAN +```python +# Example +python run_timegan.py --file_name test_data --cols_to_remove Time MNG_NO --time_gap 500 --emb_epochs 10 --sup_epochs 10 --gan_epochs 10 --window_size 5 ``` -2. VRAE를 이용한 시계열 생성 +### 2.2. VRAE를 이용한 시계열 생성 ```python -python run_vrae.py + +# Example +python run_vrae.py --file_name test_data --cols_to_remove Time MNG_NO --time_gap 500 --n_epochs 10 --window_size 5 ``` -## Models Used -### TimeGAN +## 3. Models Used +### 3.1. TimeGAN - pyTorch implementation for `TimeGAN` - Code Reference : https://github.com/d9n13lt4n/timegan-pytorch -### Variational Recurrent AutoEncoder (VRAE) +### 3.2. Variational Recurrent AutoEncoder (VRAE) - pyTorch implementation for `VRAE` - Code Reference : https://github.com/tejaslodaya/timeseries-clustering-vae -## CAUTIONS! +## 4. Model Arguments + +- Indeed, TimeGAN and VRAE have shared parameters, however there are also lot of parameters that are not shared. +- Therefore, Arguments for each model is in `config_timegan.py` and `config_vrae.py` + +### 4.1. Shared Arguments Training method for each model are the same, which uses dataset that is loaded by moving sliding window(default=30) with certain stride(default=1). -However, the generation method for each model are different! See below for more detail. +There are few things you need to know before implementing our code : + +- The generation method for each model are different : + - TimeGAN generates window sized timeseries from `random noise` (without any input) + - VRAE generates window sized timeseries from `given timeseries` and `trained latent space` (with input) + +- The query for train/test split in my code is currently used for my side-project. + - If you want to use train/test you need to go to `utils.custom_dataset` and change the query. + - For generation purpose, you don't have to worry about train/test `split (defalut = False)` + +Here are the following arguments: +``` +--file_name file_name # 분석에 사용할 파일이름 + +--cols_to_remove Var1 Var2 # 분석에서 제외할 변수이름 (Ex. time var, idx var) + +--time_gap 500 # 데이터 수집 GAP(텀) + +--window_size 10 # 학습에 사용할 윈도우 크기 + +``` + +### 4.2. TimeGAN Arguments + +TimeGAN has following modes (for more check `config_timegan.py`) : -### TimeGAN -TimeGAN has 2 Modes, which is used to decide whether to train or generate : 1. is_train (default = True) : train model with loaded train data (window_size=30, stride=1) 2. is_generate (default = True) : generate multiple(num_generation) sequences of window (window_size=30) ``` -# Mode 1 : Train mode ---is_train # train timeGAN +--is_train `True` or `False` # Train 데이터를 이용한 학습 + +--num_generation 100 # 생성할 데이터 윈도우의 개수 + +--is_generate `True` or `False` # 학습된 latent space를 바탕으로 데이터 생성 -# Mode 2 : Generation mode ---is_generate # generate window size sequences ---num_generation # number of sequences to make +--emb_epochs 3000 # AutoEncoder 학습 Epoch 수 + +--sup_epochs 3000 # Supervisor 학습 Epoch 수 + +--gan_epochs 3000 # GAN 모델 학습 Epoch 수 ``` -### Variational Recurrent AutoEncoder (VRAE) -VRAE has 3 Modes, which is used to decide whether to train or generate(train) or generate(test) : -1. is_train (default = True) : train model with loaded train data (window_size=30, stride=1) -2. is_generate_train (default = True) : generate train dataset loaded sequentially (window_size=stride) -3. is_generate_test (default = False) : generate test dataset loaded sequentially (window_size=stride) +### 4.3. Variational Recurrent AutoEncoder (VRAE) Arguments -✨ The query for train/test split in my code is currently used for my side-project. -✨ If you want to use train/test you need to go to `utils.custom_dataset` and change the query. +VRAE has following modes (for more check `config_vrae.py`) : + +1. is_train (default = True) : train model with loaded train data (window_size=30, stride=1) +2. is_generate_train (default = True) : generate train dataset loaded sequentially (stride=window_size) +3. is_generate_test (default = False) : generate test dataset loaded sequentially (stride=window_size) ``` -# Mode 1 : Train mode ---is_train # train VRAE +--is_train `True` or `False` # Train 데이터를 이용한 학습 + +--is_generate_train `True` or `False` # 학습된 latent space와 Train Data를 바탕으로 데이터 생성 -# Mode 2 : Train Generation mode ---is_generate_train # generate train dataset +--is_generate_test `True` or `False` # 학습된 latent space와 Test 데이터 생성 (실험용 : 데이터 생성 시 사용할 필요 X) -# Mode 3 : Test Generation mode ---is_generate_test # generate test dataset +--n_epochs 2000 # 모델 학습할 Epoch 수 ``` + +## Repository Structure +``` +├── data +│ ├── Data You Want to Use (in pkl) +├── gen_data_gan +│ └── where GAN Generated Data are saved +├── gen_data_vae +│ └── where VAE Generated Data are saved +├── models +│ ├── TimeGAN.py +│ └── vrae.py +├── save_model +│ └── where model parameters get saved +├── utils +│ ├── TSTR.py # TSTR(TRTS) code +│ ├── custom_dataset.py # dataloading code +│ ├── utils_timegan.py # util function for timegan +│ ├── utils_vrae.py # util function for vrae +│ ├── visual_timegan.py # visualization function for timegan +│ └── visual_vrae.py # visualization function for vrae +├── run_timegan.py +├── run_vrae.py +├── config_timegan.py +├── config_vrae.py +``` diff --git a/config_timegan.py b/config_timegan.py index 7631742..838a0b4 100644 --- a/config_timegan.py +++ b/config_timegan.py @@ -30,6 +30,21 @@ def parser_setting(parser): '--file_name', default='netis', type=str) + parser.add_argument( + '--cols_to_remove', + default='Time', + type=str, + nargs='*', + help = 'Columns to Remove') + parser.add_argument( + "--split", + type=str2bool, + default=False, + help = 'Argument for Train/Test split') + parser.add_argument( + '--time_gap', + default=100, + type=int) # train/generate argument parser.add_argument( @@ -113,7 +128,7 @@ def parser_setting(parser): default=1e-3, type=float) parser.add_argument( - '--model_path', + '--dload', default="save_model", type=str) return parser diff --git a/run_timegan.py b/run_timegan.py index 604fc25..756e6df 100644 --- a/run_timegan.py +++ b/run_timegan.py @@ -28,22 +28,30 @@ from utils.custom_dataset import * from utils.utils_timegan import * -args = config.get_config() # argument 호출 -fix_seed(args.seed) # seed 고정 +# argument 호출 +args = config.get_config() + +# seed 고정 +fix_seed(args.seed) + file_name = args.file_name # 데이터 파일명 WINDOW_SIZE = args.window_size # Window size scale_type = args.scale_type # Scaler Type 설정 ('Standard' or 'MinMax' or 'Robust') undo = args.undo # reconstruction 시 unscale 수행여부 +cols_to_remove = args.cols_to_remove # 제거할 변수 +split = args.split # TRAIN/TEST Split 여부 +time_gap = args.time_gap # 데이터수집단위 + # Load & Scale data -TRAIN_DF, TEST_DF, TRAIN_SCALED, TEST_SCALED, TRAIN_Time, TEST_Time, cols, scaler = load_gen_data(file_name = file_name, scale_type = scale_type) +TRAIN_DF, TEST_DF, TRAIN_SCALED, TEST_SCALED, TRAIN_Time, TEST_Time, cols, scaler = load_gen_data(file_name = file_name, \ + scale_type = scale_type,\ + cols_to_remove = cols_to_remove,\ + split = split) # under custom_dataset.py ## Train dataset with stride -train_dataset = NetisDataset(data = TRAIN_SCALED, timestamps = TRAIN_Time, window_size = WINDOW_SIZE, stride =1) - -## Test dataset with no window collapse (for generation window size must be WINDOW_SIZE) -test_dataset = NetisDataset(data = TEST_SCALED, timestamps = TEST_Time, window_size = WINDOW_SIZE, stride = WINDOW_SIZE) +train_dataset = TimeSeriesDataset(data = TRAIN_SCALED, timestamps = TRAIN_Time, window_size = WINDOW_SIZE, stride =1, time_gap = time_gap) # SET DEVICE device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') @@ -51,8 +59,9 @@ # SET ARGUMENTS args.feature_dim = train_dataset[0].size(1) args.Z_dim = train_dataset[0].size(1) -args.model_path = "save_model" +args.dload = "./save_model" +# model_path # DEFINE MODEl model = TimeGAN(args) model = model.to(device) diff --git a/run_vrae.py b/run_vrae.py index 846ec4d..00a6836 100644 --- a/run_vrae.py +++ b/run_vrae.py @@ -54,7 +54,7 @@ train_dataset = TimeSeriesDataset(data = TRAIN_SCALED, timestamps = TRAIN_Time, window_size = WINDOW_SIZE, stride =1, time_gap = time_gap) # SET ARGUMENTS -args.dload = "./save_model_test" # CHANGE BACK TO NORMAL AFTER TESTING +args.dload = "./save_model" args.sequence_length = WINDOW_SIZE args.number_of_features = train_dataset[0].shape[1] diff --git a/utils/utils_timegan.py b/utils/utils_timegan.py index 38b2e05..c05861d 100644 --- a/utils/utils_timegan.py +++ b/utils/utils_timegan.py @@ -235,9 +235,9 @@ def timegan_trainer(model, dataset, args): # Save model, args, and hyperparameters print('SAVING TRAINED MODEL...') - torch.save(args, f"{args.model_path}/args.pickle") - torch.save(model.state_dict(), f"{args.model_path}/model.pt") - print(f"\nSaved at path: {args.model_path}") + torch.save(args, f"{args.dload}/args.pickle") + torch.save(model.state_dict(), f"{args.dload}/model.pt") + print(f"\nSaved at path: {args.dload}") # timeGAN generator def timegan_generator(model, Num, args): @@ -250,14 +250,14 @@ def timegan_generator(model, Num, args): - generated_data (np.ndarray): The synthetic data generated by the model """ # Load model for inference - if not os.path.exists(args.model_path): + if not os.path.exists(args.dload): raise ValueError(f"Model directory not found...") # Load arguments and model - with open(f"{args.model_path}/args.pickle", "rb") as fb: + with open(f"{args.dload}/args.pickle", "rb") as fb: args = torch.load(fb) - model.load_state_dict(torch.load(f"{args.model_path}/model.pt")) + model.load_state_dict(torch.load(f"{args.dload}/model.pt")) print("\nGenerating Data...") # Initialize model to evaluation mode and run without gradients